1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMISelLowering.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMSubtarget.h"
21#include "MCTargetDesc/ARMAddressingModes.h"
22#include "ThumbRegisterInfo.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/RegisterScavenging.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/Function.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/ErrorHandling.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/Target/TargetInstrInfo.h"
43#include "llvm/Target/TargetMachine.h"
44#include "llvm/Target/TargetRegisterInfo.h"
45using namespace llvm;
46
47#define DEBUG_TYPE "arm-ldst-opt"
48
49STATISTIC(NumLDMGened , "Number of ldm instructions generated");
50STATISTIC(NumSTMGened , "Number of stm instructions generated");
51STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
52STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
53STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
54STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
55STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
56STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
57STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
58STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
59STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
60
61/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
62/// load / store instructions to form ldm / stm instructions.
63
64namespace {
65  struct ARMLoadStoreOpt : public MachineFunctionPass {
66    static char ID;
67    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
68
69    const TargetInstrInfo *TII;
70    const TargetRegisterInfo *TRI;
71    const ARMSubtarget *STI;
72    const TargetLowering *TL;
73    ARMFunctionInfo *AFI;
74    RegScavenger *RS;
75    bool isThumb1, isThumb2;
76
77    bool runOnMachineFunction(MachineFunction &Fn) override;
78
79    const char *getPassName() const override {
80      return "ARM load / store optimization pass";
81    }
82
83  private:
84    struct MemOpQueueEntry {
85      int Offset;
86      unsigned Reg;
87      bool isKill;
88      unsigned Position;
89      MachineBasicBlock::iterator MBBI;
90      bool Merged;
91      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
92                      MachineBasicBlock::iterator i)
93        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
94    };
95    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
96    typedef MemOpQueue::iterator MemOpQueueIter;
97
98    void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
99                          const MemOpQueue &MemOps, unsigned DefReg,
100                          unsigned RangeBegin, unsigned RangeEnd);
101    void UpdateBaseRegUses(MachineBasicBlock &MBB,
102                           MachineBasicBlock::iterator MBBI,
103                           DebugLoc dl, unsigned Base, unsigned WordOffset,
104                           ARMCC::CondCodes Pred, unsigned PredReg);
105    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
106                  int Offset, unsigned Base, bool BaseKill, int Opcode,
107                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
108                  DebugLoc dl,
109                  ArrayRef<std::pair<unsigned, bool> > Regs,
110                  ArrayRef<unsigned> ImpDefs);
111    void MergeOpsUpdate(MachineBasicBlock &MBB,
112                        MemOpQueue &MemOps,
113                        unsigned memOpsBegin,
114                        unsigned memOpsEnd,
115                        unsigned insertAfter,
116                        int Offset,
117                        unsigned Base,
118                        bool BaseKill,
119                        int Opcode,
120                        ARMCC::CondCodes Pred,
121                        unsigned PredReg,
122                        unsigned Scratch,
123                        DebugLoc dl,
124                        SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
125    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
126                      int Opcode, unsigned Size,
127                      ARMCC::CondCodes Pred, unsigned PredReg,
128                      unsigned Scratch, MemOpQueue &MemOps,
129                      SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
130    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
131    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
132                             MachineBasicBlock::iterator &MBBI);
133    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
134                                  MachineBasicBlock::iterator MBBI,
135                                  const TargetInstrInfo *TII,
136                                  bool &Advance,
137                                  MachineBasicBlock::iterator &I);
138    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
139                                   MachineBasicBlock::iterator MBBI,
140                                   bool &Advance,
141                                   MachineBasicBlock::iterator &I);
142    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
143    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
144  };
145  char ARMLoadStoreOpt::ID = 0;
146}
147
148static bool definesCPSR(const MachineInstr *MI) {
149  for (const auto &MO : MI->operands()) {
150    if (!MO.isReg())
151      continue;
152    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
153      // If the instruction has live CPSR def, then it's not safe to fold it
154      // into load / store.
155      return true;
156  }
157
158  return false;
159}
160
161static int getMemoryOpOffset(const MachineInstr *MI) {
162  int Opcode = MI->getOpcode();
163  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
164  unsigned NumOperands = MI->getDesc().getNumOperands();
165  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
166
167  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
168      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
169      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
170      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
171    return OffField;
172
173  // Thumb1 immediate offsets are scaled by 4
174  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
175      Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
176    return OffField * 4;
177
178  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
179    : ARM_AM::getAM5Offset(OffField) * 4;
180  ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
181    : ARM_AM::getAM5Op(OffField);
182
183  if (Op == ARM_AM::sub)
184    return -Offset;
185
186  return Offset;
187}
188
189static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
190  switch (Opcode) {
191  default: llvm_unreachable("Unhandled opcode!");
192  case ARM::LDRi12:
193    ++NumLDMGened;
194    switch (Mode) {
195    default: llvm_unreachable("Unhandled submode!");
196    case ARM_AM::ia: return ARM::LDMIA;
197    case ARM_AM::da: return ARM::LDMDA;
198    case ARM_AM::db: return ARM::LDMDB;
199    case ARM_AM::ib: return ARM::LDMIB;
200    }
201  case ARM::STRi12:
202    ++NumSTMGened;
203    switch (Mode) {
204    default: llvm_unreachable("Unhandled submode!");
205    case ARM_AM::ia: return ARM::STMIA;
206    case ARM_AM::da: return ARM::STMDA;
207    case ARM_AM::db: return ARM::STMDB;
208    case ARM_AM::ib: return ARM::STMIB;
209    }
210  case ARM::tLDRi:
211  case ARM::tLDRspi:
212    // tLDMIA is writeback-only - unless the base register is in the input
213    // reglist.
214    ++NumLDMGened;
215    switch (Mode) {
216    default: llvm_unreachable("Unhandled submode!");
217    case ARM_AM::ia: return ARM::tLDMIA;
218    }
219  case ARM::tSTRi:
220  case ARM::tSTRspi:
221    // There is no non-writeback tSTMIA either.
222    ++NumSTMGened;
223    switch (Mode) {
224    default: llvm_unreachable("Unhandled submode!");
225    case ARM_AM::ia: return ARM::tSTMIA_UPD;
226    }
227  case ARM::t2LDRi8:
228  case ARM::t2LDRi12:
229    ++NumLDMGened;
230    switch (Mode) {
231    default: llvm_unreachable("Unhandled submode!");
232    case ARM_AM::ia: return ARM::t2LDMIA;
233    case ARM_AM::db: return ARM::t2LDMDB;
234    }
235  case ARM::t2STRi8:
236  case ARM::t2STRi12:
237    ++NumSTMGened;
238    switch (Mode) {
239    default: llvm_unreachable("Unhandled submode!");
240    case ARM_AM::ia: return ARM::t2STMIA;
241    case ARM_AM::db: return ARM::t2STMDB;
242    }
243  case ARM::VLDRS:
244    ++NumVLDMGened;
245    switch (Mode) {
246    default: llvm_unreachable("Unhandled submode!");
247    case ARM_AM::ia: return ARM::VLDMSIA;
248    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
249    }
250  case ARM::VSTRS:
251    ++NumVSTMGened;
252    switch (Mode) {
253    default: llvm_unreachable("Unhandled submode!");
254    case ARM_AM::ia: return ARM::VSTMSIA;
255    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
256    }
257  case ARM::VLDRD:
258    ++NumVLDMGened;
259    switch (Mode) {
260    default: llvm_unreachable("Unhandled submode!");
261    case ARM_AM::ia: return ARM::VLDMDIA;
262    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
263    }
264  case ARM::VSTRD:
265    ++NumVSTMGened;
266    switch (Mode) {
267    default: llvm_unreachable("Unhandled submode!");
268    case ARM_AM::ia: return ARM::VSTMDIA;
269    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
270    }
271  }
272}
273
274namespace llvm {
275  namespace ARM_AM {
276
277AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
278  switch (Opcode) {
279  default: llvm_unreachable("Unhandled opcode!");
280  case ARM::LDMIA_RET:
281  case ARM::LDMIA:
282  case ARM::LDMIA_UPD:
283  case ARM::STMIA:
284  case ARM::STMIA_UPD:
285  case ARM::tLDMIA:
286  case ARM::tLDMIA_UPD:
287  case ARM::tSTMIA_UPD:
288  case ARM::t2LDMIA_RET:
289  case ARM::t2LDMIA:
290  case ARM::t2LDMIA_UPD:
291  case ARM::t2STMIA:
292  case ARM::t2STMIA_UPD:
293  case ARM::VLDMSIA:
294  case ARM::VLDMSIA_UPD:
295  case ARM::VSTMSIA:
296  case ARM::VSTMSIA_UPD:
297  case ARM::VLDMDIA:
298  case ARM::VLDMDIA_UPD:
299  case ARM::VSTMDIA:
300  case ARM::VSTMDIA_UPD:
301    return ARM_AM::ia;
302
303  case ARM::LDMDA:
304  case ARM::LDMDA_UPD:
305  case ARM::STMDA:
306  case ARM::STMDA_UPD:
307    return ARM_AM::da;
308
309  case ARM::LDMDB:
310  case ARM::LDMDB_UPD:
311  case ARM::STMDB:
312  case ARM::STMDB_UPD:
313  case ARM::t2LDMDB:
314  case ARM::t2LDMDB_UPD:
315  case ARM::t2STMDB:
316  case ARM::t2STMDB_UPD:
317  case ARM::VLDMSDB_UPD:
318  case ARM::VSTMSDB_UPD:
319  case ARM::VLDMDDB_UPD:
320  case ARM::VSTMDDB_UPD:
321    return ARM_AM::db;
322
323  case ARM::LDMIB:
324  case ARM::LDMIB_UPD:
325  case ARM::STMIB:
326  case ARM::STMIB_UPD:
327    return ARM_AM::ib;
328  }
329}
330
331  } // end namespace ARM_AM
332} // end namespace llvm
333
334static bool isT1i32Load(unsigned Opc) {
335  return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
336}
337
338static bool isT2i32Load(unsigned Opc) {
339  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
340}
341
342static bool isi32Load(unsigned Opc) {
343  return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
344}
345
346static bool isT1i32Store(unsigned Opc) {
347  return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
348}
349
350static bool isT2i32Store(unsigned Opc) {
351  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
352}
353
354static bool isi32Store(unsigned Opc) {
355  return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
356}
357
358static unsigned getImmScale(unsigned Opc) {
359  switch (Opc) {
360  default: llvm_unreachable("Unhandled opcode!");
361  case ARM::tLDRi:
362  case ARM::tSTRi:
363  case ARM::tLDRspi:
364  case ARM::tSTRspi:
365    return 1;
366  case ARM::tLDRHi:
367  case ARM::tSTRHi:
368    return 2;
369  case ARM::tLDRBi:
370  case ARM::tSTRBi:
371    return 4;
372  }
373}
374
375/// Update future uses of the base register with the offset introduced
376/// due to writeback. This function only works on Thumb1.
377void
378ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
379                                   MachineBasicBlock::iterator MBBI,
380                                   DebugLoc dl, unsigned Base,
381                                   unsigned WordOffset,
382                                   ARMCC::CondCodes Pred, unsigned PredReg) {
383  assert(isThumb1 && "Can only update base register uses for Thumb1!");
384  // Start updating any instructions with immediate offsets. Insert a SUB before
385  // the first non-updateable instruction (if any).
386  for (; MBBI != MBB.end(); ++MBBI) {
387    bool InsertSub = false;
388    unsigned Opc = MBBI->getOpcode();
389
390    if (MBBI->readsRegister(Base)) {
391      int Offset;
392      bool IsLoad =
393        Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
394      bool IsStore =
395        Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
396
397      if (IsLoad || IsStore) {
398        // Loads and stores with immediate offsets can be updated, but only if
399        // the new offset isn't negative.
400        // The MachineOperand containing the offset immediate is the last one
401        // before predicates.
402        MachineOperand &MO =
403          MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
404        // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
405        Offset = MO.getImm() - WordOffset * getImmScale(Opc);
406
407        // If storing the base register, it needs to be reset first.
408        unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
409
410        if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
411          MO.setImm(Offset);
412        else
413          InsertSub = true;
414
415      } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
416                 !definesCPSR(MBBI)) {
417        // SUBS/ADDS using this register, with a dead def of the CPSR.
418        // Merge it with the update; if the merged offset is too large,
419        // insert a new sub instead.
420        MachineOperand &MO =
421          MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
422        Offset = (Opc == ARM::tSUBi8) ?
423          MO.getImm() + WordOffset * 4 :
424          MO.getImm() - WordOffset * 4 ;
425        if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
426          // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
427          // Offset == 0.
428          MO.setImm(Offset);
429          // The base register has now been reset, so exit early.
430          return;
431        } else {
432          InsertSub = true;
433        }
434
435      } else {
436        // Can't update the instruction.
437        InsertSub = true;
438      }
439
440    } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
441      // Since SUBS sets the condition flags, we can't place the base reset
442      // after an instruction that has a live CPSR def.
443      // The base register might also contain an argument for a function call.
444      InsertSub = true;
445    }
446
447    if (InsertSub) {
448      // An instruction above couldn't be updated, so insert a sub.
449      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
450        .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
451        .addImm(Pred).addReg(PredReg);
452      return;
453    }
454
455    if (MBBI->killsRegister(Base))
456      // Register got killed. Stop updating.
457      return;
458  }
459
460  // End of block was reached.
461  if (MBB.succ_size() > 0) {
462    // FIXME: Because of a bug, live registers are sometimes missing from
463    // the successor blocks' live-in sets. This means we can't trust that
464    // information and *always* have to reset at the end of a block.
465    // See PR21029.
466    if (MBBI != MBB.end()) --MBBI;
467    AddDefaultT1CC(
468      BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
469      .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4)
470      .addImm(Pred).addReg(PredReg);
471  }
472}
473
474/// MergeOps - Create and insert a LDM or STM with Base as base register and
475/// registers in Regs as the register operands that would be loaded / stored.
476/// It returns true if the transformation is done.
477bool
478ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
479                          MachineBasicBlock::iterator MBBI,
480                          int Offset, unsigned Base, bool BaseKill,
481                          int Opcode, ARMCC::CondCodes Pred,
482                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
483                          ArrayRef<std::pair<unsigned, bool> > Regs,
484                          ArrayRef<unsigned> ImpDefs) {
485  // Only a single register to load / store. Don't bother.
486  unsigned NumRegs = Regs.size();
487  if (NumRegs <= 1)
488    return false;
489
490  // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
491  // Compute liveness information for that register to make the decision.
492  bool SafeToClobberCPSR = !isThumb1 ||
493    (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
494     MachineBasicBlock::LQR_Dead);
495
496  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
497
498  // Exception: If the base register is in the input reglist, Thumb1 LDM is
499  // non-writeback.
500  // It's also not possible to merge an STR of the base register in Thumb1.
501  if (isThumb1)
502    for (unsigned I = 0; I < NumRegs; ++I)
503      if (Base == Regs[I].first) {
504        assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
505        if (Opcode == ARM::tLDRi) {
506          Writeback = false;
507          break;
508        } else if (Opcode == ARM::tSTRi) {
509          return false;
510        }
511      }
512
513  ARM_AM::AMSubMode Mode = ARM_AM::ia;
514  // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
515  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
516  bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
517
518  if (Offset == 4 && haveIBAndDA) {
519    Mode = ARM_AM::ib;
520  } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
521    Mode = ARM_AM::da;
522  } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
523    // VLDM/VSTM do not support DB mode without also updating the base reg.
524    Mode = ARM_AM::db;
525  } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
526    // Check if this is a supported opcode before inserting instructions to
527    // calculate a new base register.
528    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
529
530    // If starting offset isn't zero, insert a MI to materialize a new base.
531    // But only do so if it is cost effective, i.e. merging more than two
532    // loads / stores.
533    if (NumRegs <= 2)
534      return false;
535
536    // On Thumb1, it's not worth materializing a new base register without
537    // clobbering the CPSR (i.e. not using ADDS/SUBS).
538    if (!SafeToClobberCPSR)
539      return false;
540
541    unsigned NewBase;
542    if (isi32Load(Opcode)) {
543      // If it is a load, then just use one of the destination register to
544      // use as the new base.
545      NewBase = Regs[NumRegs-1].first;
546    } else {
547      // Use the scratch register to use as a new base.
548      NewBase = Scratch;
549      if (NewBase == 0)
550        return false;
551    }
552
553    int BaseOpc =
554      isThumb2 ? ARM::t2ADDri :
555      (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
556      (isThumb1 && Offset < 8) ? ARM::tADDi3 :
557      isThumb1 ? ARM::tADDi8  : ARM::ADDri;
558
559    if (Offset < 0) {
560      Offset = - Offset;
561      BaseOpc =
562        isThumb2 ? ARM::t2SUBri :
563        (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
564        isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
565    }
566
567    if (!TL->isLegalAddImmediate(Offset))
568      // FIXME: Try add with register operand?
569      return false; // Probably not worth it then.
570
571    if (isThumb1) {
572      // Thumb1: depending on immediate size, use either
573      //   ADDS NewBase, Base, #imm3
574      // or
575      //   MOV  NewBase, Base
576      //   ADDS NewBase, #imm8.
577      if (Base != NewBase &&
578          (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
579        // Need to insert a MOV to the new base first.
580        if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
581            !STI->hasV6Ops()) {
582          // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
583          if (Pred != ARMCC::AL)
584            return false;
585          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
586            .addReg(Base, getKillRegState(BaseKill));
587        } else
588          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
589            .addReg(Base, getKillRegState(BaseKill))
590            .addImm(Pred).addReg(PredReg);
591
592        // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
593        Base = NewBase;
594        BaseKill = false;
595      }
596      if (BaseOpc == ARM::tADDrSPi) {
597        assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
598        BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
599          .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
600          .addImm(Pred).addReg(PredReg);
601      } else
602        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
603          .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
604          .addImm(Pred).addReg(PredReg);
605    } else {
606      BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
607        .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
608        .addImm(Pred).addReg(PredReg).addReg(0);
609    }
610    Base = NewBase;
611    BaseKill = true; // New base is always killed straight away.
612  }
613
614  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
615                Opcode == ARM::VLDRD);
616
617  // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
618  // base register writeback.
619  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
620  if (!Opcode) return false;
621
622  // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
623  // - There is no writeback (LDM of base register),
624  // - the base register is killed by the merged instruction,
625  // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
626  //   to reset the base register.
627  // Otherwise, don't merge.
628  // It's safe to return here since the code to materialize a new base register
629  // above is also conditional on SafeToClobberCPSR.
630  if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
631    return false;
632
633  MachineInstrBuilder MIB;
634
635  if (Writeback) {
636    if (Opcode == ARM::tLDMIA)
637      // Update tLDMIA with writeback if necessary.
638      Opcode = ARM::tLDMIA_UPD;
639
640    MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
641
642    // Thumb1: we might need to set base writeback when building the MI.
643    MIB.addReg(Base, getDefRegState(true))
644       .addReg(Base, getKillRegState(BaseKill));
645
646    // The base isn't dead after a merged instruction with writeback.
647    // Insert a sub instruction after the newly formed instruction to reset.
648    if (!BaseKill)
649      UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
650
651  } else {
652    // No writeback, simply build the MachineInstr.
653    MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
654    MIB.addReg(Base, getKillRegState(BaseKill));
655  }
656
657  MIB.addImm(Pred).addReg(PredReg);
658
659  for (unsigned i = 0; i != NumRegs; ++i)
660    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
661                     | getKillRegState(Regs[i].second));
662
663  // Add implicit defs for super-registers.
664  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
665    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
666
667  return true;
668}
669
670/// \brief Find all instructions using a given imp-def within a range.
671///
672/// We are trying to combine a range of instructions, one of which (located at
673/// position RangeBegin) implicitly defines a register. The final LDM/STM will
674/// be placed at RangeEnd, and so any uses of this definition between RangeStart
675/// and RangeEnd must be modified to use an undefined value.
676///
677/// The live range continues until we find a second definition or one of the
678/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
679/// we must consider all uses and decide which are relevant in a second pass.
680void ARMLoadStoreOpt::findUsesOfImpDef(
681    SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
682    unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
683  std::map<unsigned, MachineOperand *> Uses;
684  unsigned LastLivePos = RangeEnd;
685
686  // First we find all uses of this register with Position between RangeBegin
687  // and RangeEnd, any or all of these could be uses of a definition at
688  // RangeBegin. We also record the latest position a definition at RangeBegin
689  // would be considered live.
690  for (unsigned i = 0; i < MemOps.size(); ++i) {
691    MachineInstr &MI = *MemOps[i].MBBI;
692    unsigned MIPosition = MemOps[i].Position;
693    if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
694      continue;
695
696    // If this instruction defines the register, then any later use will be of
697    // that definition rather than ours.
698    if (MI.definesRegister(DefReg))
699      LastLivePos = std::min(LastLivePos, MIPosition);
700
701    MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
702    if (!UseOp)
703      continue;
704
705    // If this instruction kills the register then (assuming liveness is
706    // correct when we start) we don't need to think about anything after here.
707    if (UseOp->isKill())
708      LastLivePos = std::min(LastLivePos, MIPosition);
709
710    Uses[MIPosition] = UseOp;
711  }
712
713  // Now we traverse the list of all uses, and append the ones that actually use
714  // our definition to the requested list.
715  for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
716                                                      E = Uses.end();
717       I != E; ++I) {
718    // List is sorted by position so once we've found one out of range there
719    // will be no more to consider.
720    if (I->first > LastLivePos)
721      break;
722    UsesOfImpDefs.push_back(I->second);
723  }
724}
725
726// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
727// success.
728void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
729                                     MemOpQueue &memOps,
730                                     unsigned memOpsBegin, unsigned memOpsEnd,
731                                     unsigned insertAfter, int Offset,
732                                     unsigned Base, bool BaseKill,
733                                     int Opcode,
734                                     ARMCC::CondCodes Pred, unsigned PredReg,
735                                     unsigned Scratch,
736                                     DebugLoc dl,
737                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
738  // First calculate which of the registers should be killed by the merged
739  // instruction.
740  const unsigned insertPos = memOps[insertAfter].Position;
741  SmallSet<unsigned, 4> KilledRegs;
742  DenseMap<unsigned, unsigned> Killer;
743  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
744    if (i == memOpsBegin) {
745      i = memOpsEnd;
746      if (i == e)
747        break;
748    }
749    if (memOps[i].Position < insertPos && memOps[i].isKill) {
750      unsigned Reg = memOps[i].Reg;
751      KilledRegs.insert(Reg);
752      Killer[Reg] = i;
753    }
754  }
755
756  SmallVector<std::pair<unsigned, bool>, 8> Regs;
757  SmallVector<unsigned, 8> ImpDefs;
758  SmallVector<MachineOperand *, 8> UsesOfImpDefs;
759  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
760    unsigned Reg = memOps[i].Reg;
761    // If we are inserting the merged operation after an operation that
762    // uses the same register, make sure to transfer any kill flag.
763    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
764    Regs.push_back(std::make_pair(Reg, isKill));
765
766    // Collect any implicit defs of super-registers. They must be preserved.
767    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
768      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
769        continue;
770      unsigned DefReg = MO->getReg();
771      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
772        ImpDefs.push_back(DefReg);
773
774      // There may be other uses of the definition between this instruction and
775      // the eventual LDM/STM position. These should be marked undef if the
776      // merge takes place.
777      findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
778                       insertPos);
779    }
780  }
781
782  // Try to do the merge.
783  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
784  ++Loc;
785  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
786                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
787    return;
788
789  // Merge succeeded, update records.
790  Merges.push_back(std::prev(Loc));
791
792  // In gathering loads together, we may have moved the imp-def of a register
793  // past one of its uses. This is OK, since we know better than the rest of
794  // LLVM what's OK with ARM loads and stores; but we still have to adjust the
795  // affected uses.
796  for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
797                                                   E = UsesOfImpDefs.end();
798                                                   I != E; ++I)
799    (*I)->setIsUndef();
800
801  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
802    // Remove kill flags from any memops that come before insertPos.
803    if (Regs[i-memOpsBegin].second) {
804      unsigned Reg = Regs[i-memOpsBegin].first;
805      if (KilledRegs.count(Reg)) {
806        unsigned j = Killer[Reg];
807        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
808        assert(Idx >= 0 && "Cannot find killing operand");
809        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
810        memOps[j].isKill = false;
811      }
812      memOps[i].isKill = true;
813    }
814    MBB.erase(memOps[i].MBBI);
815    // Update this memop to refer to the merged instruction.
816    // We may need to move kill flags again.
817    memOps[i].Merged = true;
818    memOps[i].MBBI = Merges.back();
819    memOps[i].Position = insertPos;
820  }
821
822  // Update memOps offsets, since they may have been modified by MergeOps.
823  for (auto &MemOp : memOps) {
824    MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
825  }
826}
827
828/// MergeLDR_STR - Merge a number of load / store instructions into one or more
829/// load / store multiple instructions.
830void
831ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
832                         unsigned Base, int Opcode, unsigned Size,
833                         ARMCC::CondCodes Pred, unsigned PredReg,
834                         unsigned Scratch, MemOpQueue &MemOps,
835                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
836  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
837  int Offset = MemOps[SIndex].Offset;
838  int SOffset = Offset;
839  unsigned insertAfter = SIndex;
840  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
841  DebugLoc dl = Loc->getDebugLoc();
842  const MachineOperand &PMO = Loc->getOperand(0);
843  unsigned PReg = PMO.getReg();
844  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
845  unsigned Count = 1;
846  unsigned Limit = ~0U;
847  bool BaseKill = false;
848  // vldm / vstm limit are 32 for S variants, 16 for D variants.
849
850  switch (Opcode) {
851  default: break;
852  case ARM::VSTRS:
853    Limit = 32;
854    break;
855  case ARM::VSTRD:
856    Limit = 16;
857    break;
858  case ARM::VLDRD:
859    Limit = 16;
860    break;
861  case ARM::VLDRS:
862    Limit = 32;
863    break;
864  }
865
866  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
867    int NewOffset = MemOps[i].Offset;
868    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
869    unsigned Reg = MO.getReg();
870    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
871    // Register numbers must be in ascending order. For VFP / NEON load and
872    // store multiples, the registers must also be consecutive and within the
873    // limit on the number of registers per instruction.
874    if (Reg != ARM::SP &&
875        NewOffset == Offset + (int)Size &&
876        ((isNotVFP && RegNum > PRegNum) ||
877         ((Count < Limit) && RegNum == PRegNum+1)) &&
878        // On Swift we don't want vldm/vstm to start with a odd register num
879        // because Q register unaligned vldm/vstm need more uops.
880        (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
881      Offset += Size;
882      PRegNum = RegNum;
883      ++Count;
884    } else {
885      // Can't merge this in. Try merge the earlier ones first.
886      // We need to compute BaseKill here because the MemOps may have been
887      // reordered.
888      BaseKill = Loc->killsRegister(Base);
889
890      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
891                     BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
892      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
893                   MemOps, Merges);
894      return;
895    }
896
897    if (MemOps[i].Position > MemOps[insertAfter].Position) {
898      insertAfter = i;
899      Loc = MemOps[i].MBBI;
900    }
901  }
902
903  BaseKill =  Loc->killsRegister(Base);
904  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
905                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
906}
907
908static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
909                                unsigned Bytes, unsigned Limit,
910                                ARMCC::CondCodes Pred, unsigned PredReg) {
911  unsigned MyPredReg = 0;
912  if (!MI)
913    return false;
914
915  bool CheckCPSRDef = false;
916  switch (MI->getOpcode()) {
917  default: return false;
918  case ARM::tSUBi8:
919  case ARM::t2SUBri:
920  case ARM::SUBri:
921    CheckCPSRDef = true;
922  // fallthrough
923  case ARM::tSUBspi:
924    break;
925  }
926
927  // Make sure the offset fits in 8 bits.
928  if (Bytes == 0 || (Limit && Bytes >= Limit))
929    return false;
930
931  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
932                    MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
933  if (!(MI->getOperand(0).getReg() == Base &&
934        MI->getOperand(1).getReg() == Base &&
935        (MI->getOperand(2).getImm() * Scale) == Bytes &&
936        getInstrPredicate(MI, MyPredReg) == Pred &&
937        MyPredReg == PredReg))
938    return false;
939
940  return CheckCPSRDef ? !definesCPSR(MI) : true;
941}
942
943static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
944                                unsigned Bytes, unsigned Limit,
945                                ARMCC::CondCodes Pred, unsigned PredReg) {
946  unsigned MyPredReg = 0;
947  if (!MI)
948    return false;
949
950  bool CheckCPSRDef = false;
951  switch (MI->getOpcode()) {
952  default: return false;
953  case ARM::tADDi8:
954  case ARM::t2ADDri:
955  case ARM::ADDri:
956    CheckCPSRDef = true;
957  // fallthrough
958  case ARM::tADDspi:
959    break;
960  }
961
962  if (Bytes == 0 || (Limit && Bytes >= Limit))
963    // Make sure the offset fits in 8 bits.
964    return false;
965
966  unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
967                    MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
968  if (!(MI->getOperand(0).getReg() == Base &&
969        MI->getOperand(1).getReg() == Base &&
970        (MI->getOperand(2).getImm() * Scale) == Bytes &&
971        getInstrPredicate(MI, MyPredReg) == Pred &&
972        MyPredReg == PredReg))
973    return false;
974
975  return CheckCPSRDef ? !definesCPSR(MI) : true;
976}
977
978static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
979  switch (MI->getOpcode()) {
980  default: return 0;
981  case ARM::LDRi12:
982  case ARM::STRi12:
983  case ARM::tLDRi:
984  case ARM::tSTRi:
985  case ARM::tLDRspi:
986  case ARM::tSTRspi:
987  case ARM::t2LDRi8:
988  case ARM::t2LDRi12:
989  case ARM::t2STRi8:
990  case ARM::t2STRi12:
991  case ARM::VLDRS:
992  case ARM::VSTRS:
993    return 4;
994  case ARM::VLDRD:
995  case ARM::VSTRD:
996    return 8;
997  case ARM::LDMIA:
998  case ARM::LDMDA:
999  case ARM::LDMDB:
1000  case ARM::LDMIB:
1001  case ARM::STMIA:
1002  case ARM::STMDA:
1003  case ARM::STMDB:
1004  case ARM::STMIB:
1005  case ARM::tLDMIA:
1006  case ARM::tLDMIA_UPD:
1007  case ARM::tSTMIA_UPD:
1008  case ARM::t2LDMIA:
1009  case ARM::t2LDMDB:
1010  case ARM::t2STMIA:
1011  case ARM::t2STMDB:
1012  case ARM::VLDMSIA:
1013  case ARM::VSTMSIA:
1014    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
1015  case ARM::VLDMDIA:
1016  case ARM::VSTMDIA:
1017    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
1018  }
1019}
1020
1021static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1022                                            ARM_AM::AMSubMode Mode) {
1023  switch (Opc) {
1024  default: llvm_unreachable("Unhandled opcode!");
1025  case ARM::LDMIA:
1026  case ARM::LDMDA:
1027  case ARM::LDMDB:
1028  case ARM::LDMIB:
1029    switch (Mode) {
1030    default: llvm_unreachable("Unhandled submode!");
1031    case ARM_AM::ia: return ARM::LDMIA_UPD;
1032    case ARM_AM::ib: return ARM::LDMIB_UPD;
1033    case ARM_AM::da: return ARM::LDMDA_UPD;
1034    case ARM_AM::db: return ARM::LDMDB_UPD;
1035    }
1036  case ARM::STMIA:
1037  case ARM::STMDA:
1038  case ARM::STMDB:
1039  case ARM::STMIB:
1040    switch (Mode) {
1041    default: llvm_unreachable("Unhandled submode!");
1042    case ARM_AM::ia: return ARM::STMIA_UPD;
1043    case ARM_AM::ib: return ARM::STMIB_UPD;
1044    case ARM_AM::da: return ARM::STMDA_UPD;
1045    case ARM_AM::db: return ARM::STMDB_UPD;
1046    }
1047  case ARM::t2LDMIA:
1048  case ARM::t2LDMDB:
1049    switch (Mode) {
1050    default: llvm_unreachable("Unhandled submode!");
1051    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1052    case ARM_AM::db: return ARM::t2LDMDB_UPD;
1053    }
1054  case ARM::t2STMIA:
1055  case ARM::t2STMDB:
1056    switch (Mode) {
1057    default: llvm_unreachable("Unhandled submode!");
1058    case ARM_AM::ia: return ARM::t2STMIA_UPD;
1059    case ARM_AM::db: return ARM::t2STMDB_UPD;
1060    }
1061  case ARM::VLDMSIA:
1062    switch (Mode) {
1063    default: llvm_unreachable("Unhandled submode!");
1064    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1065    case ARM_AM::db: return ARM::VLDMSDB_UPD;
1066    }
1067  case ARM::VLDMDIA:
1068    switch (Mode) {
1069    default: llvm_unreachable("Unhandled submode!");
1070    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1071    case ARM_AM::db: return ARM::VLDMDDB_UPD;
1072    }
1073  case ARM::VSTMSIA:
1074    switch (Mode) {
1075    default: llvm_unreachable("Unhandled submode!");
1076    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1077    case ARM_AM::db: return ARM::VSTMSDB_UPD;
1078    }
1079  case ARM::VSTMDIA:
1080    switch (Mode) {
1081    default: llvm_unreachable("Unhandled submode!");
1082    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1083    case ARM_AM::db: return ARM::VSTMDDB_UPD;
1084    }
1085  }
1086}
1087
1088/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
1089/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1090///
1091/// stmia rn, <ra, rb, rc>
1092/// rn := rn + 4 * 3;
1093/// =>
1094/// stmia rn!, <ra, rb, rc>
1095///
1096/// rn := rn - 4 * 3;
1097/// ldmia rn, <ra, rb, rc>
1098/// =>
1099/// ldmdb rn!, <ra, rb, rc>
1100bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
1101                                               MachineBasicBlock::iterator MBBI,
1102                                               bool &Advance,
1103                                               MachineBasicBlock::iterator &I) {
1104  // Thumb1 is already using updating loads/stores.
1105  if (isThumb1) return false;
1106
1107  MachineInstr *MI = MBBI;
1108  unsigned Base = MI->getOperand(0).getReg();
1109  bool BaseKill = MI->getOperand(0).isKill();
1110  unsigned Bytes = getLSMultipleTransferSize(MI);
1111  unsigned PredReg = 0;
1112  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1113  int Opcode = MI->getOpcode();
1114  DebugLoc dl = MI->getDebugLoc();
1115
1116  // Can't use an updating ld/st if the base register is also a dest
1117  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1118  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1119    if (MI->getOperand(i).getReg() == Base)
1120      return false;
1121
1122  bool DoMerge = false;
1123  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
1124
1125  // Try merging with the previous instruction.
1126  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1127  if (MBBI != BeginMBBI) {
1128    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1129    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1130      --PrevMBBI;
1131    if (Mode == ARM_AM::ia &&
1132        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1133      Mode = ARM_AM::db;
1134      DoMerge = true;
1135    } else if (Mode == ARM_AM::ib &&
1136               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
1137      Mode = ARM_AM::da;
1138      DoMerge = true;
1139    }
1140    if (DoMerge)
1141      MBB.erase(PrevMBBI);
1142  }
1143
1144  // Try merging with the next instruction.
1145  MachineBasicBlock::iterator EndMBBI = MBB.end();
1146  if (!DoMerge && MBBI != EndMBBI) {
1147    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1148    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1149      ++NextMBBI;
1150    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
1151        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1152      DoMerge = true;
1153    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
1154               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
1155      DoMerge = true;
1156    }
1157    if (DoMerge) {
1158      if (NextMBBI == I) {
1159        Advance = true;
1160        ++I;
1161      }
1162      MBB.erase(NextMBBI);
1163    }
1164  }
1165
1166  if (!DoMerge)
1167    return false;
1168
1169  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1170  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1171    .addReg(Base, getDefRegState(true)) // WB base register
1172    .addReg(Base, getKillRegState(BaseKill))
1173    .addImm(Pred).addReg(PredReg);
1174
1175  // Transfer the rest of operands.
1176  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1177    MIB.addOperand(MI->getOperand(OpNum));
1178
1179  // Transfer memoperands.
1180  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1181
1182  MBB.erase(MBBI);
1183  return true;
1184}
1185
1186static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1187                                             ARM_AM::AddrOpc Mode) {
1188  switch (Opc) {
1189  case ARM::LDRi12:
1190    return ARM::LDR_PRE_IMM;
1191  case ARM::STRi12:
1192    return ARM::STR_PRE_IMM;
1193  case ARM::VLDRS:
1194    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1195  case ARM::VLDRD:
1196    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1197  case ARM::VSTRS:
1198    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1199  case ARM::VSTRD:
1200    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1201  case ARM::t2LDRi8:
1202  case ARM::t2LDRi12:
1203    return ARM::t2LDR_PRE;
1204  case ARM::t2STRi8:
1205  case ARM::t2STRi12:
1206    return ARM::t2STR_PRE;
1207  default: llvm_unreachable("Unhandled opcode!");
1208  }
1209}
1210
1211static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1212                                              ARM_AM::AddrOpc Mode) {
1213  switch (Opc) {
1214  case ARM::LDRi12:
1215    return ARM::LDR_POST_IMM;
1216  case ARM::STRi12:
1217    return ARM::STR_POST_IMM;
1218  case ARM::VLDRS:
1219    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1220  case ARM::VLDRD:
1221    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1222  case ARM::VSTRS:
1223    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1224  case ARM::VSTRD:
1225    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1226  case ARM::t2LDRi8:
1227  case ARM::t2LDRi12:
1228    return ARM::t2LDR_POST;
1229  case ARM::t2STRi8:
1230  case ARM::t2STRi12:
1231    return ARM::t2STR_POST;
1232  default: llvm_unreachable("Unhandled opcode!");
1233  }
1234}
1235
1236/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
1237/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1238bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
1239                                               MachineBasicBlock::iterator MBBI,
1240                                               const TargetInstrInfo *TII,
1241                                               bool &Advance,
1242                                               MachineBasicBlock::iterator &I) {
1243  // Thumb1 doesn't have updating LDR/STR.
1244  // FIXME: Use LDM/STM with single register instead.
1245  if (isThumb1) return false;
1246
1247  MachineInstr *MI = MBBI;
1248  unsigned Base = MI->getOperand(1).getReg();
1249  bool BaseKill = MI->getOperand(1).isKill();
1250  unsigned Bytes = getLSMultipleTransferSize(MI);
1251  int Opcode = MI->getOpcode();
1252  DebugLoc dl = MI->getDebugLoc();
1253  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1254                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1255  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1256  if (isi32Load(Opcode) || isi32Store(Opcode))
1257    if (MI->getOperand(2).getImm() != 0)
1258      return false;
1259  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1260    return false;
1261
1262  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
1263  // Can't do the merge if the destination register is the same as the would-be
1264  // writeback register.
1265  if (MI->getOperand(0).getReg() == Base)
1266    return false;
1267
1268  unsigned PredReg = 0;
1269  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1270  bool DoMerge = false;
1271  ARM_AM::AddrOpc AddSub = ARM_AM::add;
1272  unsigned NewOpc = 0;
1273  // AM2 - 12 bits, thumb2 - 8 bits.
1274  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
1275
1276  // Try merging with the previous instruction.
1277  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1278  if (MBBI != BeginMBBI) {
1279    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1280    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
1281      --PrevMBBI;
1282    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1283      DoMerge = true;
1284      AddSub = ARM_AM::sub;
1285    } else if (!isAM5 &&
1286               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1287      DoMerge = true;
1288    }
1289    if (DoMerge) {
1290      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
1291      MBB.erase(PrevMBBI);
1292    }
1293  }
1294
1295  // Try merging with the next instruction.
1296  MachineBasicBlock::iterator EndMBBI = MBB.end();
1297  if (!DoMerge && MBBI != EndMBBI) {
1298    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1299    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1300      ++NextMBBI;
1301    if (!isAM5 &&
1302        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
1303      DoMerge = true;
1304      AddSub = ARM_AM::sub;
1305    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
1306      DoMerge = true;
1307    }
1308    if (DoMerge) {
1309      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
1310      if (NextMBBI == I) {
1311        Advance = true;
1312        ++I;
1313      }
1314      MBB.erase(NextMBBI);
1315    }
1316  }
1317
1318  if (!DoMerge)
1319    return false;
1320
1321  if (isAM5) {
1322    // VLDM[SD]_UPD, VSTM[SD]_UPD
1323    // (There are no base-updating versions of VLDR/VSTR instructions, but the
1324    // updating load/store-multiple instructions can be used with only one
1325    // register.)
1326    MachineOperand &MO = MI->getOperand(0);
1327    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
1328      .addReg(Base, getDefRegState(true)) // WB base register
1329      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1330      .addImm(Pred).addReg(PredReg)
1331      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1332                            getKillRegState(MO.isKill())));
1333  } else if (isLd) {
1334    if (isAM2) {
1335      // LDR_PRE, LDR_POST
1336      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1337        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1338        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1339          .addReg(Base, RegState::Define)
1340          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1341      } else {
1342        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1343        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1344          .addReg(Base, RegState::Define)
1345          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1346      }
1347    } else {
1348      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1349      // t2LDR_PRE, t2LDR_POST
1350      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
1351        .addReg(Base, RegState::Define)
1352        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1353    }
1354  } else {
1355    MachineOperand &MO = MI->getOperand(0);
1356    // FIXME: post-indexed stores use am2offset_imm, which still encodes
1357    // the vestigal zero-reg offset register. When that's fixed, this clause
1358    // can be removed entirely.
1359    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1360      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1361      // STR_PRE, STR_POST
1362      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1363        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1364        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
1365    } else {
1366      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
1367      // t2STR_PRE, t2STR_POST
1368      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
1369        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1370        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1371    }
1372  }
1373  MBB.erase(MBBI);
1374
1375  return true;
1376}
1377
1378/// isMemoryOp - Returns true if instruction is a memory operation that this
1379/// pass is capable of operating on.
1380static bool isMemoryOp(const MachineInstr *MI) {
1381  // When no memory operands are present, conservatively assume unaligned,
1382  // volatile, unfoldable.
1383  if (!MI->hasOneMemOperand())
1384    return false;
1385
1386  const MachineMemOperand *MMO = *MI->memoperands_begin();
1387
1388  // Don't touch volatile memory accesses - we may be changing their order.
1389  if (MMO->isVolatile())
1390    return false;
1391
1392  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1393  // not.
1394  if (MMO->getAlignment() < 4)
1395    return false;
1396
1397  // str <undef> could probably be eliminated entirely, but for now we just want
1398  // to avoid making a mess of it.
1399  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1400  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1401      MI->getOperand(0).isUndef())
1402    return false;
1403
1404  // Likewise don't mess with references to undefined addresses.
1405  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1406      MI->getOperand(1).isUndef())
1407    return false;
1408
1409  int Opcode = MI->getOpcode();
1410  switch (Opcode) {
1411  default: break;
1412  case ARM::VLDRS:
1413  case ARM::VSTRS:
1414    return MI->getOperand(1).isReg();
1415  case ARM::VLDRD:
1416  case ARM::VSTRD:
1417    return MI->getOperand(1).isReg();
1418  case ARM::LDRi12:
1419  case ARM::STRi12:
1420  case ARM::tLDRi:
1421  case ARM::tSTRi:
1422  case ARM::tLDRspi:
1423  case ARM::tSTRspi:
1424  case ARM::t2LDRi8:
1425  case ARM::t2LDRi12:
1426  case ARM::t2STRi8:
1427  case ARM::t2STRi12:
1428    return MI->getOperand(1).isReg();
1429  }
1430  return false;
1431}
1432
1433/// AdvanceRS - Advance register scavenger to just before the earliest memory
1434/// op that is being merged.
1435void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1436  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1437  unsigned Position = MemOps[0].Position;
1438  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1439    if (MemOps[i].Position < Position) {
1440      Position = MemOps[i].Position;
1441      Loc = MemOps[i].MBBI;
1442    }
1443  }
1444
1445  if (Loc != MBB.begin())
1446    RS->forward(std::prev(Loc));
1447}
1448
1449static void InsertLDR_STR(MachineBasicBlock &MBB,
1450                          MachineBasicBlock::iterator &MBBI,
1451                          int Offset, bool isDef,
1452                          DebugLoc dl, unsigned NewOpc,
1453                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1454                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1455                          bool OffKill, bool OffUndef,
1456                          ARMCC::CondCodes Pred, unsigned PredReg,
1457                          const TargetInstrInfo *TII, bool isT2) {
1458  if (isDef) {
1459    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1460                                      TII->get(NewOpc))
1461      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1462      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1463    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1464  } else {
1465    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1466                                      TII->get(NewOpc))
1467      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1468      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1469    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1470  }
1471}
1472
1473bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1474                                          MachineBasicBlock::iterator &MBBI) {
1475  MachineInstr *MI = &*MBBI;
1476  unsigned Opcode = MI->getOpcode();
1477  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1478      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1479    const MachineOperand &BaseOp = MI->getOperand(2);
1480    unsigned BaseReg = BaseOp.getReg();
1481    unsigned EvenReg = MI->getOperand(0).getReg();
1482    unsigned OddReg  = MI->getOperand(1).getReg();
1483    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1484    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1485    // ARM errata 602117: LDRD with base in list may result in incorrect base
1486    // register when interrupted or faulted.
1487    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1488    if (!Errata602117 &&
1489        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1490      return false;
1491
1492    MachineBasicBlock::iterator NewBBI = MBBI;
1493    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1494    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1495    bool EvenDeadKill = isLd ?
1496      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1497    bool EvenUndef = MI->getOperand(0).isUndef();
1498    bool OddDeadKill  = isLd ?
1499      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1500    bool OddUndef = MI->getOperand(1).isUndef();
1501    bool BaseKill = BaseOp.isKill();
1502    bool BaseUndef = BaseOp.isUndef();
1503    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1504    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1505    int OffImm = getMemoryOpOffset(MI);
1506    unsigned PredReg = 0;
1507    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1508
1509    if (OddRegNum > EvenRegNum && OffImm == 0) {
1510      // Ascending register numbers and no offset. It's safe to change it to a
1511      // ldm or stm.
1512      unsigned NewOpc = (isLd)
1513        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1514        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1515      if (isLd) {
1516        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1517          .addReg(BaseReg, getKillRegState(BaseKill))
1518          .addImm(Pred).addReg(PredReg)
1519          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1520          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1521        ++NumLDRD2LDM;
1522      } else {
1523        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1524          .addReg(BaseReg, getKillRegState(BaseKill))
1525          .addImm(Pred).addReg(PredReg)
1526          .addReg(EvenReg,
1527                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1528          .addReg(OddReg,
1529                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1530        ++NumSTRD2STM;
1531      }
1532      NewBBI = std::prev(MBBI);
1533    } else {
1534      // Split into two instructions.
1535      unsigned NewOpc = (isLd)
1536        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1537        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1538      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1539      // so adjust and use t2LDRi12 here for that.
1540      unsigned NewOpc2 = (isLd)
1541        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1542        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1543      DebugLoc dl = MBBI->getDebugLoc();
1544      // If this is a load and base register is killed, it may have been
1545      // re-defed by the load, make sure the first load does not clobber it.
1546      if (isLd &&
1547          (BaseKill || OffKill) &&
1548          (TRI->regsOverlap(EvenReg, BaseReg))) {
1549        assert(!TRI->regsOverlap(OddReg, BaseReg));
1550        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1551                      OddReg, OddDeadKill, false,
1552                      BaseReg, false, BaseUndef, false, OffUndef,
1553                      Pred, PredReg, TII, isT2);
1554        NewBBI = std::prev(MBBI);
1555        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1556                      EvenReg, EvenDeadKill, false,
1557                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1558                      Pred, PredReg, TII, isT2);
1559      } else {
1560        if (OddReg == EvenReg && EvenDeadKill) {
1561          // If the two source operands are the same, the kill marker is
1562          // probably on the first one. e.g.
1563          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1564          EvenDeadKill = false;
1565          OddDeadKill = true;
1566        }
1567        // Never kill the base register in the first instruction.
1568        if (EvenReg == BaseReg)
1569          EvenDeadKill = false;
1570        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1571                      EvenReg, EvenDeadKill, EvenUndef,
1572                      BaseReg, false, BaseUndef, false, OffUndef,
1573                      Pred, PredReg, TII, isT2);
1574        NewBBI = std::prev(MBBI);
1575        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1576                      OddReg, OddDeadKill, OddUndef,
1577                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1578                      Pred, PredReg, TII, isT2);
1579      }
1580      if (isLd)
1581        ++NumLDRD2LDR;
1582      else
1583        ++NumSTRD2STR;
1584    }
1585
1586    MBB.erase(MI);
1587    MBBI = NewBBI;
1588    return true;
1589  }
1590  return false;
1591}
1592
1593/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1594/// ops of the same base and incrementing offset into LDM / STM ops.
1595bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1596  unsigned NumMerges = 0;
1597  unsigned NumMemOps = 0;
1598  MemOpQueue MemOps;
1599  unsigned CurrBase = 0;
1600  int CurrOpc = -1;
1601  unsigned CurrSize = 0;
1602  ARMCC::CondCodes CurrPred = ARMCC::AL;
1603  unsigned CurrPredReg = 0;
1604  unsigned Position = 0;
1605  SmallVector<MachineBasicBlock::iterator,4> Merges;
1606
1607  RS->enterBasicBlock(&MBB);
1608  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1609  while (MBBI != E) {
1610    if (FixInvalidRegPairOp(MBB, MBBI))
1611      continue;
1612
1613    bool Advance  = false;
1614    bool TryMerge = false;
1615    bool Clobber  = false;
1616
1617    bool isMemOp = isMemoryOp(MBBI);
1618    if (isMemOp) {
1619      int Opcode = MBBI->getOpcode();
1620      unsigned Size = getLSMultipleTransferSize(MBBI);
1621      const MachineOperand &MO = MBBI->getOperand(0);
1622      unsigned Reg = MO.getReg();
1623      bool isKill = MO.isDef() ? false : MO.isKill();
1624      unsigned Base = MBBI->getOperand(1).getReg();
1625      unsigned PredReg = 0;
1626      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1627      int Offset = getMemoryOpOffset(MBBI);
1628      // Watch out for:
1629      // r4 := ldr [r5]
1630      // r5 := ldr [r5, #4]
1631      // r6 := ldr [r5, #8]
1632      //
1633      // The second ldr has effectively broken the chain even though it
1634      // looks like the later ldr(s) use the same base register. Try to
1635      // merge the ldr's so far, including this one. But don't try to
1636      // combine the following ldr(s).
1637      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1638
1639      // Watch out for:
1640      // r4 := ldr [r0, #8]
1641      // r4 := ldr [r0, #4]
1642      //
1643      // The optimization may reorder the second ldr in front of the first
1644      // ldr, which violates write after write(WAW) dependence. The same as
1645      // str. Try to merge inst(s) already in MemOps.
1646      bool Overlap = false;
1647      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1648        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1649          Overlap = true;
1650          break;
1651        }
1652      }
1653
1654      if (CurrBase == 0 && !Clobber) {
1655        // Start of a new chain.
1656        CurrBase = Base;
1657        CurrOpc  = Opcode;
1658        CurrSize = Size;
1659        CurrPred = Pred;
1660        CurrPredReg = PredReg;
1661        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1662        ++NumMemOps;
1663        Advance = true;
1664      } else if (!Overlap) {
1665        if (Clobber) {
1666          TryMerge = true;
1667          Advance = true;
1668        }
1669
1670        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1671          // No need to match PredReg.
1672          // Continue adding to the queue.
1673          if (Offset > MemOps.back().Offset) {
1674            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1675                                             Position, MBBI));
1676            ++NumMemOps;
1677            Advance = true;
1678          } else {
1679            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1680                 I != E; ++I) {
1681              if (Offset < I->Offset) {
1682                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1683                                                 Position, MBBI));
1684                ++NumMemOps;
1685                Advance = true;
1686                break;
1687              } else if (Offset == I->Offset) {
1688                // Collision! This can't be merged!
1689                break;
1690              }
1691            }
1692          }
1693        }
1694      }
1695    }
1696
1697    if (MBBI->isDebugValue()) {
1698      ++MBBI;
1699      if (MBBI == E)
1700        // Reach the end of the block, try merging the memory instructions.
1701        TryMerge = true;
1702    } else if (Advance) {
1703      ++Position;
1704      ++MBBI;
1705      if (MBBI == E)
1706        // Reach the end of the block, try merging the memory instructions.
1707        TryMerge = true;
1708    } else {
1709      TryMerge = true;
1710    }
1711
1712    if (TryMerge) {
1713      if (NumMemOps > 1) {
1714        // Try to find a free register to use as a new base in case it's needed.
1715        // First advance to the instruction just before the start of the chain.
1716        AdvanceRS(MBB, MemOps);
1717
1718        // Find a scratch register.
1719        unsigned Scratch =
1720          RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
1721
1722        // Process the load / store instructions.
1723        RS->forward(std::prev(MBBI));
1724
1725        // Merge ops.
1726        Merges.clear();
1727        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1728                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1729
1730        // Try folding preceding/trailing base inc/dec into the generated
1731        // LDM/STM ops.
1732        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1733          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1734            ++NumMerges;
1735        NumMerges += Merges.size();
1736
1737        // Try folding preceding/trailing base inc/dec into those load/store
1738        // that were not merged to form LDM/STM ops.
1739        for (unsigned i = 0; i != NumMemOps; ++i)
1740          if (!MemOps[i].Merged)
1741            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1742              ++NumMerges;
1743
1744        // RS may be pointing to an instruction that's deleted.
1745        RS->skipTo(std::prev(MBBI));
1746      } else if (NumMemOps == 1) {
1747        // Try folding preceding/trailing base inc/dec into the single
1748        // load/store.
1749        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1750          ++NumMerges;
1751          RS->forward(std::prev(MBBI));
1752        }
1753      }
1754
1755      CurrBase = 0;
1756      CurrOpc = -1;
1757      CurrSize = 0;
1758      CurrPred = ARMCC::AL;
1759      CurrPredReg = 0;
1760      if (NumMemOps) {
1761        MemOps.clear();
1762        NumMemOps = 0;
1763      }
1764
1765      // If iterator hasn't been advanced and this is not a memory op, skip it.
1766      // It can't start a new chain anyway.
1767      if (!Advance && !isMemOp && MBBI != E) {
1768        ++Position;
1769        ++MBBI;
1770      }
1771    }
1772  }
1773  return NumMerges > 0;
1774}
1775
1776/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1777/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1778/// directly restore the value of LR into pc.
1779///   ldmfd sp!, {..., lr}
1780///   bx lr
1781/// or
1782///   ldmfd sp!, {..., lr}
1783///   mov pc, lr
1784/// =>
1785///   ldmfd sp!, {..., pc}
1786bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1787  // Thumb1 LDM doesn't allow high registers.
1788  if (isThumb1) return false;
1789  if (MBB.empty()) return false;
1790
1791  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1792  if (MBBI != MBB.begin() &&
1793      (MBBI->getOpcode() == ARM::BX_RET ||
1794       MBBI->getOpcode() == ARM::tBX_RET ||
1795       MBBI->getOpcode() == ARM::MOVPCLR)) {
1796    MachineInstr *PrevMI = std::prev(MBBI);
1797    unsigned Opcode = PrevMI->getOpcode();
1798    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1799        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1800        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1801      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1802      if (MO.getReg() != ARM::LR)
1803        return false;
1804      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1805      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1806              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1807      PrevMI->setDesc(TII->get(NewOpc));
1808      MO.setReg(ARM::PC);
1809      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1810      MBB.erase(MBBI);
1811      return true;
1812    }
1813  }
1814  return false;
1815}
1816
1817bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1818  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1819  TL = STI->getTargetLowering();
1820  AFI = Fn.getInfo<ARMFunctionInfo>();
1821  TII = STI->getInstrInfo();
1822  TRI = STI->getRegisterInfo();
1823  RS = new RegScavenger();
1824  isThumb2 = AFI->isThumb2Function();
1825  isThumb1 = AFI->isThumbFunction() && !isThumb2;
1826
1827  bool Modified = false;
1828  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1829       ++MFI) {
1830    MachineBasicBlock &MBB = *MFI;
1831    Modified |= LoadStoreMultipleOpti(MBB);
1832    if (STI->hasV5TOps())
1833      Modified |= MergeReturnIntoLDM(MBB);
1834  }
1835
1836  delete RS;
1837  return Modified;
1838}
1839
1840
1841/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1842/// load / stores from consecutive locations close to make it more
1843/// likely they will be combined later.
1844
1845namespace {
1846  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1847    static char ID;
1848    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1849
1850    const DataLayout *TD;
1851    const TargetInstrInfo *TII;
1852    const TargetRegisterInfo *TRI;
1853    const ARMSubtarget *STI;
1854    MachineRegisterInfo *MRI;
1855    MachineFunction *MF;
1856
1857    bool runOnMachineFunction(MachineFunction &Fn) override;
1858
1859    const char *getPassName() const override {
1860      return "ARM pre- register allocation load / store optimization pass";
1861    }
1862
1863  private:
1864    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1865                          unsigned &NewOpc, unsigned &EvenReg,
1866                          unsigned &OddReg, unsigned &BaseReg,
1867                          int &Offset,
1868                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1869                          bool &isT2);
1870    bool RescheduleOps(MachineBasicBlock *MBB,
1871                       SmallVectorImpl<MachineInstr *> &Ops,
1872                       unsigned Base, bool isLd,
1873                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1874    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1875  };
1876  char ARMPreAllocLoadStoreOpt::ID = 0;
1877}
1878
1879bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1880  TD = Fn.getTarget().getDataLayout();
1881  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1882  TII = STI->getInstrInfo();
1883  TRI = STI->getRegisterInfo();
1884  MRI = &Fn.getRegInfo();
1885  MF  = &Fn;
1886
1887  bool Modified = false;
1888  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1889       ++MFI)
1890    Modified |= RescheduleLoadStoreInstrs(MFI);
1891
1892  return Modified;
1893}
1894
1895static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1896                                      MachineBasicBlock::iterator I,
1897                                      MachineBasicBlock::iterator E,
1898                                      SmallPtrSetImpl<MachineInstr*> &MemOps,
1899                                      SmallSet<unsigned, 4> &MemRegs,
1900                                      const TargetRegisterInfo *TRI) {
1901  // Are there stores / loads / calls between them?
1902  // FIXME: This is overly conservative. We should make use of alias information
1903  // some day.
1904  SmallSet<unsigned, 4> AddedRegPressure;
1905  while (++I != E) {
1906    if (I->isDebugValue() || MemOps.count(&*I))
1907      continue;
1908    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1909      return false;
1910    if (isLd && I->mayStore())
1911      return false;
1912    if (!isLd) {
1913      if (I->mayLoad())
1914        return false;
1915      // It's not safe to move the first 'str' down.
1916      // str r1, [r0]
1917      // strh r5, [r0]
1918      // str r4, [r0, #+4]
1919      if (I->mayStore())
1920        return false;
1921    }
1922    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1923      MachineOperand &MO = I->getOperand(j);
1924      if (!MO.isReg())
1925        continue;
1926      unsigned Reg = MO.getReg();
1927      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1928        return false;
1929      if (Reg != Base && !MemRegs.count(Reg))
1930        AddedRegPressure.insert(Reg);
1931    }
1932  }
1933
1934  // Estimate register pressure increase due to the transformation.
1935  if (MemRegs.size() <= 4)
1936    // Ok if we are moving small number of instructions.
1937    return true;
1938  return AddedRegPressure.size() <= MemRegs.size() * 2;
1939}
1940
1941
1942/// Copy Op0 and Op1 operands into a new array assigned to MI.
1943static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1944                                   MachineInstr *Op1) {
1945  assert(MI->memoperands_empty() && "expected a new machineinstr");
1946  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1947    + (Op1->memoperands_end() - Op1->memoperands_begin());
1948
1949  MachineFunction *MF = MI->getParent()->getParent();
1950  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1951  MachineSDNode::mmo_iterator MemEnd =
1952    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1953  MemEnd =
1954    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1955  MI->setMemRefs(MemBegin, MemEnd);
1956}
1957
1958bool
1959ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1960                                          DebugLoc &dl,
1961                                          unsigned &NewOpc, unsigned &EvenReg,
1962                                          unsigned &OddReg, unsigned &BaseReg,
1963                                          int &Offset, unsigned &PredReg,
1964                                          ARMCC::CondCodes &Pred,
1965                                          bool &isT2) {
1966  // Make sure we're allowed to generate LDRD/STRD.
1967  if (!STI->hasV5TEOps())
1968    return false;
1969
1970  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1971  unsigned Scale = 1;
1972  unsigned Opcode = Op0->getOpcode();
1973  if (Opcode == ARM::LDRi12) {
1974    NewOpc = ARM::LDRD;
1975  } else if (Opcode == ARM::STRi12) {
1976    NewOpc = ARM::STRD;
1977  } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1978    NewOpc = ARM::t2LDRDi8;
1979    Scale = 4;
1980    isT2 = true;
1981  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1982    NewOpc = ARM::t2STRDi8;
1983    Scale = 4;
1984    isT2 = true;
1985  } else {
1986    return false;
1987  }
1988
1989  // Make sure the base address satisfies i64 ld / st alignment requirement.
1990  // At the moment, we ignore the memoryoperand's value.
1991  // If we want to use AliasAnalysis, we should check it accordingly.
1992  if (!Op0->hasOneMemOperand() ||
1993      (*Op0->memoperands_begin())->isVolatile())
1994    return false;
1995
1996  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1997  const Function *Func = MF->getFunction();
1998  unsigned ReqAlign = STI->hasV6Ops()
1999    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2000    : 8;  // Pre-v6 need 8-byte align
2001  if (Align < ReqAlign)
2002    return false;
2003
2004  // Then make sure the immediate offset fits.
2005  int OffImm = getMemoryOpOffset(Op0);
2006  if (isT2) {
2007    int Limit = (1 << 8) * Scale;
2008    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2009      return false;
2010    Offset = OffImm;
2011  } else {
2012    ARM_AM::AddrOpc AddSub = ARM_AM::add;
2013    if (OffImm < 0) {
2014      AddSub = ARM_AM::sub;
2015      OffImm = - OffImm;
2016    }
2017    int Limit = (1 << 8) * Scale;
2018    if (OffImm >= Limit || (OffImm & (Scale-1)))
2019      return false;
2020    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2021  }
2022  EvenReg = Op0->getOperand(0).getReg();
2023  OddReg  = Op1->getOperand(0).getReg();
2024  if (EvenReg == OddReg)
2025    return false;
2026  BaseReg = Op0->getOperand(1).getReg();
2027  Pred = getInstrPredicate(Op0, PredReg);
2028  dl = Op0->getDebugLoc();
2029  return true;
2030}
2031
2032bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2033                                 SmallVectorImpl<MachineInstr *> &Ops,
2034                                 unsigned Base, bool isLd,
2035                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2036  bool RetVal = false;
2037
2038  // Sort by offset (in reverse order).
2039  std::sort(Ops.begin(), Ops.end(),
2040            [](const MachineInstr *LHS, const MachineInstr *RHS) {
2041    int LOffset = getMemoryOpOffset(LHS);
2042    int ROffset = getMemoryOpOffset(RHS);
2043    assert(LHS == RHS || LOffset != ROffset);
2044    return LOffset > ROffset;
2045  });
2046
2047  // The loads / stores of the same base are in order. Scan them from first to
2048  // last and check for the following:
2049  // 1. Any def of base.
2050  // 2. Any gaps.
2051  while (Ops.size() > 1) {
2052    unsigned FirstLoc = ~0U;
2053    unsigned LastLoc = 0;
2054    MachineInstr *FirstOp = nullptr;
2055    MachineInstr *LastOp = nullptr;
2056    int LastOffset = 0;
2057    unsigned LastOpcode = 0;
2058    unsigned LastBytes = 0;
2059    unsigned NumMove = 0;
2060    for (int i = Ops.size() - 1; i >= 0; --i) {
2061      MachineInstr *Op = Ops[i];
2062      unsigned Loc = MI2LocMap[Op];
2063      if (Loc <= FirstLoc) {
2064        FirstLoc = Loc;
2065        FirstOp = Op;
2066      }
2067      if (Loc >= LastLoc) {
2068        LastLoc = Loc;
2069        LastOp = Op;
2070      }
2071
2072      unsigned LSMOpcode
2073        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2074      if (LastOpcode && LSMOpcode != LastOpcode)
2075        break;
2076
2077      int Offset = getMemoryOpOffset(Op);
2078      unsigned Bytes = getLSMultipleTransferSize(Op);
2079      if (LastBytes) {
2080        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2081          break;
2082      }
2083      LastOffset = Offset;
2084      LastBytes = Bytes;
2085      LastOpcode = LSMOpcode;
2086      if (++NumMove == 8) // FIXME: Tune this limit.
2087        break;
2088    }
2089
2090    if (NumMove <= 1)
2091      Ops.pop_back();
2092    else {
2093      SmallPtrSet<MachineInstr*, 4> MemOps;
2094      SmallSet<unsigned, 4> MemRegs;
2095      for (int i = NumMove-1; i >= 0; --i) {
2096        MemOps.insert(Ops[i]);
2097        MemRegs.insert(Ops[i]->getOperand(0).getReg());
2098      }
2099
2100      // Be conservative, if the instructions are too far apart, don't
2101      // move them. We want to limit the increase of register pressure.
2102      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2103      if (DoMove)
2104        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2105                                           MemOps, MemRegs, TRI);
2106      if (!DoMove) {
2107        for (unsigned i = 0; i != NumMove; ++i)
2108          Ops.pop_back();
2109      } else {
2110        // This is the new location for the loads / stores.
2111        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2112        while (InsertPos != MBB->end()
2113               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2114          ++InsertPos;
2115
2116        // If we are moving a pair of loads / stores, see if it makes sense
2117        // to try to allocate a pair of registers that can form register pairs.
2118        MachineInstr *Op0 = Ops.back();
2119        MachineInstr *Op1 = Ops[Ops.size()-2];
2120        unsigned EvenReg = 0, OddReg = 0;
2121        unsigned BaseReg = 0, PredReg = 0;
2122        ARMCC::CondCodes Pred = ARMCC::AL;
2123        bool isT2 = false;
2124        unsigned NewOpc = 0;
2125        int Offset = 0;
2126        DebugLoc dl;
2127        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2128                                             EvenReg, OddReg, BaseReg,
2129                                             Offset, PredReg, Pred, isT2)) {
2130          Ops.pop_back();
2131          Ops.pop_back();
2132
2133          const MCInstrDesc &MCID = TII->get(NewOpc);
2134          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2135          MRI->constrainRegClass(EvenReg, TRC);
2136          MRI->constrainRegClass(OddReg, TRC);
2137
2138          // Form the pair instruction.
2139          if (isLd) {
2140            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2141              .addReg(EvenReg, RegState::Define)
2142              .addReg(OddReg, RegState::Define)
2143              .addReg(BaseReg);
2144            // FIXME: We're converting from LDRi12 to an insn that still
2145            // uses addrmode2, so we need an explicit offset reg. It should
2146            // always by reg0 since we're transforming LDRi12s.
2147            if (!isT2)
2148              MIB.addReg(0);
2149            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2150            concatenateMemOperands(MIB, Op0, Op1);
2151            DEBUG(dbgs() << "Formed " << *MIB << "\n");
2152            ++NumLDRDFormed;
2153          } else {
2154            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2155              .addReg(EvenReg)
2156              .addReg(OddReg)
2157              .addReg(BaseReg);
2158            // FIXME: We're converting from LDRi12 to an insn that still
2159            // uses addrmode2, so we need an explicit offset reg. It should
2160            // always by reg0 since we're transforming STRi12s.
2161            if (!isT2)
2162              MIB.addReg(0);
2163            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2164            concatenateMemOperands(MIB, Op0, Op1);
2165            DEBUG(dbgs() << "Formed " << *MIB << "\n");
2166            ++NumSTRDFormed;
2167          }
2168          MBB->erase(Op0);
2169          MBB->erase(Op1);
2170
2171          // Add register allocation hints to form register pairs.
2172          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
2173          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
2174        } else {
2175          for (unsigned i = 0; i != NumMove; ++i) {
2176            MachineInstr *Op = Ops.back();
2177            Ops.pop_back();
2178            MBB->splice(InsertPos, MBB, Op);
2179          }
2180        }
2181
2182        NumLdStMoved += NumMove;
2183        RetVal = true;
2184      }
2185    }
2186  }
2187
2188  return RetVal;
2189}
2190
2191bool
2192ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2193  bool RetVal = false;
2194
2195  DenseMap<MachineInstr*, unsigned> MI2LocMap;
2196  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2197  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2198  SmallVector<unsigned, 4> LdBases;
2199  SmallVector<unsigned, 4> StBases;
2200
2201  unsigned Loc = 0;
2202  MachineBasicBlock::iterator MBBI = MBB->begin();
2203  MachineBasicBlock::iterator E = MBB->end();
2204  while (MBBI != E) {
2205    for (; MBBI != E; ++MBBI) {
2206      MachineInstr *MI = MBBI;
2207      if (MI->isCall() || MI->isTerminator()) {
2208        // Stop at barriers.
2209        ++MBBI;
2210        break;
2211      }
2212
2213      if (!MI->isDebugValue())
2214        MI2LocMap[MI] = ++Loc;
2215
2216      if (!isMemoryOp(MI))
2217        continue;
2218      unsigned PredReg = 0;
2219      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2220        continue;
2221
2222      int Opc = MI->getOpcode();
2223      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
2224      unsigned Base = MI->getOperand(1).getReg();
2225      int Offset = getMemoryOpOffset(MI);
2226
2227      bool StopHere = false;
2228      if (isLd) {
2229        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2230          Base2LdsMap.find(Base);
2231        if (BI != Base2LdsMap.end()) {
2232          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2233            if (Offset == getMemoryOpOffset(BI->second[i])) {
2234              StopHere = true;
2235              break;
2236            }
2237          }
2238          if (!StopHere)
2239            BI->second.push_back(MI);
2240        } else {
2241          Base2LdsMap[Base].push_back(MI);
2242          LdBases.push_back(Base);
2243        }
2244      } else {
2245        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2246          Base2StsMap.find(Base);
2247        if (BI != Base2StsMap.end()) {
2248          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2249            if (Offset == getMemoryOpOffset(BI->second[i])) {
2250              StopHere = true;
2251              break;
2252            }
2253          }
2254          if (!StopHere)
2255            BI->second.push_back(MI);
2256        } else {
2257          Base2StsMap[Base].push_back(MI);
2258          StBases.push_back(Base);
2259        }
2260      }
2261
2262      if (StopHere) {
2263        // Found a duplicate (a base+offset combination that's seen earlier).
2264        // Backtrack.
2265        --Loc;
2266        break;
2267      }
2268    }
2269
2270    // Re-schedule loads.
2271    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2272      unsigned Base = LdBases[i];
2273      SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2274      if (Lds.size() > 1)
2275        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2276    }
2277
2278    // Re-schedule stores.
2279    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2280      unsigned Base = StBases[i];
2281      SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2282      if (Sts.size() > 1)
2283        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2284    }
2285
2286    if (MBBI != E) {
2287      Base2LdsMap.clear();
2288      Base2StsMap.clear();
2289      LdBases.clear();
2290      StBases.clear();
2291    }
2292  }
2293
2294  return RetVal;
2295}
2296
2297
2298/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
2299/// optimization pass.
2300FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2301  if (PreAlloc)
2302    return new ARMPreAllocLoadStoreOpt();
2303  return new ARMLoadStoreOpt();
2304}
2305