ARMLoadStoreOptimizer.cpp revision 0e5233a9e5ee9385c6a940e3985194d77bee0bbb
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMBaseRegisterInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/Target/TargetData.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/STLExtras.h"
39#include "llvm/ADT/SmallPtrSet.h"
40#include "llvm/ADT/SmallSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43using namespace llvm;
44
45STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46STATISTIC(NumSTMGened , "Number of stm instructions generated");
47STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56
57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58/// load / store instructions to form ldm / stm instructions.
59
60namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62    static char ID;
63    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65    const TargetInstrInfo *TII;
66    const TargetRegisterInfo *TRI;
67    const ARMSubtarget *STI;
68    ARMFunctionInfo *AFI;
69    RegScavenger *RS;
70    bool isThumb2;
71
72    virtual bool runOnMachineFunction(MachineFunction &Fn);
73
74    virtual const char *getPassName() const {
75      return "ARM load / store optimization pass";
76    }
77
78  private:
79    struct MemOpQueueEntry {
80      int Offset;
81      unsigned Reg;
82      bool isKill;
83      unsigned Position;
84      MachineBasicBlock::iterator MBBI;
85      bool Merged;
86      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                      MachineBasicBlock::iterator i)
88        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89    };
90    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91    typedef MemOpQueue::iterator MemOpQueueIter;
92
93    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                  int Offset, unsigned Base, bool BaseKill, int Opcode,
95                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97    void MergeOpsUpdate(MachineBasicBlock &MBB,
98                        MemOpQueue &MemOps,
99                        unsigned memOpsBegin,
100                        unsigned memOpsEnd,
101                        unsigned insertAfter,
102                        int Offset,
103                        unsigned Base,
104                        bool BaseKill,
105                        int Opcode,
106                        ARMCC::CondCodes Pred,
107                        unsigned PredReg,
108                        unsigned Scratch,
109                        DebugLoc dl,
110                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112                      int Opcode, unsigned Size,
113                      ARMCC::CondCodes Pred, unsigned PredReg,
114                      unsigned Scratch, MemOpQueue &MemOps,
115                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
116
117    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119                             MachineBasicBlock::iterator &MBBI);
120    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121                                  MachineBasicBlock::iterator MBBI,
122                                  const TargetInstrInfo *TII,
123                                  bool &Advance,
124                                  MachineBasicBlock::iterator &I);
125    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126                                   MachineBasicBlock::iterator MBBI,
127                                   bool &Advance,
128                                   MachineBasicBlock::iterator &I);
129    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
131  };
132  char ARMLoadStoreOpt::ID = 0;
133}
134
135static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
136  switch (Opcode) {
137  default: llvm_unreachable("Unhandled opcode!");
138  case ARM::LDRi12:
139    ++NumLDMGened;
140    switch (Mode) {
141    default: llvm_unreachable("Unhandled submode!");
142    case ARM_AM::ia: return ARM::LDMIA;
143    case ARM_AM::da: return ARM::LDMDA;
144    case ARM_AM::db: return ARM::LDMDB;
145    case ARM_AM::ib: return ARM::LDMIB;
146    }
147  case ARM::STRi12:
148    ++NumSTMGened;
149    switch (Mode) {
150    default: llvm_unreachable("Unhandled submode!");
151    case ARM_AM::ia: return ARM::STMIA;
152    case ARM_AM::da: return ARM::STMDA;
153    case ARM_AM::db: return ARM::STMDB;
154    case ARM_AM::ib: return ARM::STMIB;
155    }
156  case ARM::t2LDRi8:
157  case ARM::t2LDRi12:
158    ++NumLDMGened;
159    switch (Mode) {
160    default: llvm_unreachable("Unhandled submode!");
161    case ARM_AM::ia: return ARM::t2LDMIA;
162    case ARM_AM::db: return ARM::t2LDMDB;
163    }
164  case ARM::t2STRi8:
165  case ARM::t2STRi12:
166    ++NumSTMGened;
167    switch (Mode) {
168    default: llvm_unreachable("Unhandled submode!");
169    case ARM_AM::ia: return ARM::t2STMIA;
170    case ARM_AM::db: return ARM::t2STMDB;
171    }
172  case ARM::VLDRS:
173    ++NumVLDMGened;
174    switch (Mode) {
175    default: llvm_unreachable("Unhandled submode!");
176    case ARM_AM::ia: return ARM::VLDMSIA;
177    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
178    }
179  case ARM::VSTRS:
180    ++NumVSTMGened;
181    switch (Mode) {
182    default: llvm_unreachable("Unhandled submode!");
183    case ARM_AM::ia: return ARM::VSTMSIA;
184    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
185    }
186  case ARM::VLDRD:
187    ++NumVLDMGened;
188    switch (Mode) {
189    default: llvm_unreachable("Unhandled submode!");
190    case ARM_AM::ia: return ARM::VLDMDIA;
191    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
192    }
193  case ARM::VSTRD:
194    ++NumVSTMGened;
195    switch (Mode) {
196    default: llvm_unreachable("Unhandled submode!");
197    case ARM_AM::ia: return ARM::VSTMDIA;
198    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
199    }
200  }
201}
202
203namespace llvm {
204  namespace ARM_AM {
205
206AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
207  switch (Opcode) {
208  default: llvm_unreachable("Unhandled opcode!");
209  case ARM::LDMIA_RET:
210  case ARM::LDMIA:
211  case ARM::LDMIA_UPD:
212  case ARM::STMIA:
213  case ARM::STMIA_UPD:
214  case ARM::t2LDMIA_RET:
215  case ARM::t2LDMIA:
216  case ARM::t2LDMIA_UPD:
217  case ARM::t2STMIA:
218  case ARM::t2STMIA_UPD:
219  case ARM::VLDMSIA:
220  case ARM::VLDMSIA_UPD:
221  case ARM::VSTMSIA:
222  case ARM::VSTMSIA_UPD:
223  case ARM::VLDMDIA:
224  case ARM::VLDMDIA_UPD:
225  case ARM::VSTMDIA:
226  case ARM::VSTMDIA_UPD:
227    return ARM_AM::ia;
228
229  case ARM::LDMDA:
230  case ARM::LDMDA_UPD:
231  case ARM::STMDA:
232  case ARM::STMDA_UPD:
233    return ARM_AM::da;
234
235  case ARM::LDMDB:
236  case ARM::LDMDB_UPD:
237  case ARM::STMDB:
238  case ARM::STMDB_UPD:
239  case ARM::t2LDMDB:
240  case ARM::t2LDMDB_UPD:
241  case ARM::t2STMDB:
242  case ARM::t2STMDB_UPD:
243  case ARM::VLDMSDB_UPD:
244  case ARM::VSTMSDB_UPD:
245  case ARM::VLDMDDB_UPD:
246  case ARM::VSTMDDB_UPD:
247    return ARM_AM::db;
248
249  case ARM::LDMIB:
250  case ARM::LDMIB_UPD:
251  case ARM::STMIB:
252  case ARM::STMIB_UPD:
253    return ARM_AM::ib;
254  }
255}
256
257  } // end namespace ARM_AM
258} // end namespace llvm
259
260static bool isT2i32Load(unsigned Opc) {
261  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
262}
263
264static bool isi32Load(unsigned Opc) {
265  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
266}
267
268static bool isT2i32Store(unsigned Opc) {
269  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
270}
271
272static bool isi32Store(unsigned Opc) {
273  return Opc == ARM::STRi12 || isT2i32Store(Opc);
274}
275
276/// MergeOps - Create and insert a LDM or STM with Base as base register and
277/// registers in Regs as the register operands that would be loaded / stored.
278/// It returns true if the transformation is done.
279bool
280ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
281                          MachineBasicBlock::iterator MBBI,
282                          int Offset, unsigned Base, bool BaseKill,
283                          int Opcode, ARMCC::CondCodes Pred,
284                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
285                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
286  // Only a single register to load / store. Don't bother.
287  unsigned NumRegs = Regs.size();
288  if (NumRegs <= 1)
289    return false;
290
291  ARM_AM::AMSubMode Mode = ARM_AM::ia;
292  // VFP and Thumb2 do not support IB or DA modes.
293  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
294  bool haveIBAndDA = isNotVFP && !isThumb2;
295  if (Offset == 4 && haveIBAndDA)
296    Mode = ARM_AM::ib;
297  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
298    Mode = ARM_AM::da;
299  else if (Offset == -4 * (int)NumRegs && isNotVFP)
300    // VLDM/VSTM do not support DB mode without also updating the base reg.
301    Mode = ARM_AM::db;
302  else if (Offset != 0) {
303    // Check if this is a supported opcode before we insert instructions to
304    // calculate a new base register.
305    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
306
307    // If starting offset isn't zero, insert a MI to materialize a new base.
308    // But only do so if it is cost effective, i.e. merging more than two
309    // loads / stores.
310    if (NumRegs <= 2)
311      return false;
312
313    unsigned NewBase;
314    if (isi32Load(Opcode))
315      // If it is a load, then just use one of the destination register to
316      // use as the new base.
317      NewBase = Regs[NumRegs-1].first;
318    else {
319      // Use the scratch register to use as a new base.
320      NewBase = Scratch;
321      if (NewBase == 0)
322        return false;
323    }
324    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
325    if (Offset < 0) {
326      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
327      Offset = - Offset;
328    }
329    int ImmedOffset = isThumb2
330      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
331    if (ImmedOffset == -1)
332      // FIXME: Try t2ADDri12 or t2SUBri12?
333      return false;  // Probably not worth it then.
334
335    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
336      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
337      .addImm(Pred).addReg(PredReg).addReg(0);
338    Base = NewBase;
339    BaseKill = true;  // New base is always killed right its use.
340  }
341
342  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
343                Opcode == ARM::VLDRD);
344  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
345  if (!Opcode) return false;
346  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
347    .addReg(Base, getKillRegState(BaseKill))
348    .addImm(Pred).addReg(PredReg);
349  for (unsigned i = 0; i != NumRegs; ++i)
350    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
351                     | getKillRegState(Regs[i].second));
352
353  return true;
354}
355
356// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
357// success.
358void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
359                                     MemOpQueue &memOps,
360                                     unsigned memOpsBegin, unsigned memOpsEnd,
361                                     unsigned insertAfter, int Offset,
362                                     unsigned Base, bool BaseKill,
363                                     int Opcode,
364                                     ARMCC::CondCodes Pred, unsigned PredReg,
365                                     unsigned Scratch,
366                                     DebugLoc dl,
367                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
368  // First calculate which of the registers should be killed by the merged
369  // instruction.
370  const unsigned insertPos = memOps[insertAfter].Position;
371  SmallSet<unsigned, 4> KilledRegs;
372  DenseMap<unsigned, unsigned> Killer;
373  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
374    if (i == memOpsBegin) {
375      i = memOpsEnd;
376      if (i == e)
377        break;
378    }
379    if (memOps[i].Position < insertPos && memOps[i].isKill) {
380      unsigned Reg = memOps[i].Reg;
381      KilledRegs.insert(Reg);
382      Killer[Reg] = i;
383    }
384  }
385
386  SmallVector<std::pair<unsigned, bool>, 8> Regs;
387  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
388    unsigned Reg = memOps[i].Reg;
389    // If we are inserting the merged operation after an operation that
390    // uses the same register, make sure to transfer any kill flag.
391    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
392    Regs.push_back(std::make_pair(Reg, isKill));
393  }
394
395  // Try to do the merge.
396  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
397  ++Loc;
398  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
399                Pred, PredReg, Scratch, dl, Regs))
400    return;
401
402  // Merge succeeded, update records.
403  Merges.push_back(prior(Loc));
404  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
405    // Remove kill flags from any memops that come before insertPos.
406    if (Regs[i-memOpsBegin].second) {
407      unsigned Reg = Regs[i-memOpsBegin].first;
408      if (KilledRegs.count(Reg)) {
409        unsigned j = Killer[Reg];
410        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
411        assert(Idx >= 0 && "Cannot find killing operand");
412        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
413        memOps[j].isKill = false;
414      }
415      memOps[i].isKill = true;
416    }
417    MBB.erase(memOps[i].MBBI);
418    // Update this memop to refer to the merged instruction.
419    // We may need to move kill flags again.
420    memOps[i].Merged = true;
421    memOps[i].MBBI = Merges.back();
422    memOps[i].Position = insertPos;
423  }
424}
425
426/// MergeLDR_STR - Merge a number of load / store instructions into one or more
427/// load / store multiple instructions.
428void
429ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
430                          unsigned Base, int Opcode, unsigned Size,
431                          ARMCC::CondCodes Pred, unsigned PredReg,
432                          unsigned Scratch, MemOpQueue &MemOps,
433                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
434  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
435  int Offset = MemOps[SIndex].Offset;
436  int SOffset = Offset;
437  unsigned insertAfter = SIndex;
438  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
439  DebugLoc dl = Loc->getDebugLoc();
440  const MachineOperand &PMO = Loc->getOperand(0);
441  unsigned PReg = PMO.getReg();
442  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
443    : getARMRegisterNumbering(PReg);
444  unsigned Count = 1;
445  unsigned Limit = ~0U;
446
447  // vldm / vstm limit are 32 for S variants, 16 for D variants.
448
449  switch (Opcode) {
450  default: break;
451  case ARM::VSTRS:
452    Limit = 32;
453    break;
454  case ARM::VSTRD:
455    Limit = 16;
456    break;
457  case ARM::VLDRD:
458    Limit = 16;
459    break;
460  case ARM::VLDRS:
461    Limit = 32;
462    break;
463  }
464
465  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
466    int NewOffset = MemOps[i].Offset;
467    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
468    unsigned Reg = MO.getReg();
469    unsigned RegNum = MO.isUndef() ? UINT_MAX
470      : getARMRegisterNumbering(Reg);
471    // Register numbers must be in ascending order. For VFP / NEON load and
472    // store multiples, the registers must also be consecutive and within the
473    // limit on the number of registers per instruction.
474    if (Reg != ARM::SP &&
475        NewOffset == Offset + (int)Size &&
476        ((isNotVFP && RegNum > PRegNum) ||
477         ((Count < Limit) && RegNum == PRegNum+1))) {
478      Offset += Size;
479      PRegNum = RegNum;
480      ++Count;
481    } else {
482      // Can't merge this in. Try merge the earlier ones first.
483      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
484                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
485      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
486                   MemOps, Merges);
487      return;
488    }
489
490    if (MemOps[i].Position > MemOps[insertAfter].Position)
491      insertAfter = i;
492  }
493
494  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
495  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
496                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
497  return;
498}
499
500static bool definesCPSR(MachineInstr *MI) {
501  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
502    const MachineOperand &MO = MI->getOperand(i);
503    if (!MO.isReg())
504      continue;
505    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
506      // If the instruction has live CPSR def, then it's not safe to fold it
507      // into load / store.
508      return true;
509  }
510
511  return false;
512}
513
514static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
515                                unsigned Bytes, unsigned Limit,
516                                ARMCC::CondCodes Pred, unsigned PredReg) {
517  unsigned MyPredReg = 0;
518  if (!MI)
519    return false;
520
521  bool CheckCPSRDef = false;
522  switch (MI->getOpcode()) {
523  default: return false;
524  case ARM::t2SUBri:
525  case ARM::SUBri:
526    CheckCPSRDef = true;
527  // fallthrough
528  case ARM::tSUBspi:
529    break;
530  }
531
532  // Make sure the offset fits in 8 bits.
533  if (Bytes == 0 || (Limit && Bytes >= Limit))
534    return false;
535
536  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
537  if (!(MI->getOperand(0).getReg() == Base &&
538        MI->getOperand(1).getReg() == Base &&
539        (MI->getOperand(2).getImm()*Scale) == Bytes &&
540        llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
541        MyPredReg == PredReg))
542    return false;
543
544  return CheckCPSRDef ? !definesCPSR(MI) : true;
545}
546
547static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
548                                unsigned Bytes, unsigned Limit,
549                                ARMCC::CondCodes Pred, unsigned PredReg) {
550  unsigned MyPredReg = 0;
551  if (!MI)
552    return false;
553
554  bool CheckCPSRDef = false;
555  switch (MI->getOpcode()) {
556  default: return false;
557  case ARM::t2ADDri:
558  case ARM::ADDri:
559    CheckCPSRDef = true;
560  // fallthrough
561  case ARM::tADDspi:
562    break;
563  }
564
565  if (Bytes == 0 || (Limit && Bytes >= Limit))
566    // Make sure the offset fits in 8 bits.
567    return false;
568
569  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
570  if (!(MI->getOperand(0).getReg() == Base &&
571        MI->getOperand(1).getReg() == Base &&
572        (MI->getOperand(2).getImm()*Scale) == Bytes &&
573        llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
574        MyPredReg == PredReg))
575    return false;
576
577  return CheckCPSRDef ? !definesCPSR(MI) : true;
578}
579
580static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
581  switch (MI->getOpcode()) {
582  default: return 0;
583  case ARM::LDRi12:
584  case ARM::STRi12:
585  case ARM::t2LDRi8:
586  case ARM::t2LDRi12:
587  case ARM::t2STRi8:
588  case ARM::t2STRi12:
589  case ARM::VLDRS:
590  case ARM::VSTRS:
591    return 4;
592  case ARM::VLDRD:
593  case ARM::VSTRD:
594    return 8;
595  case ARM::LDMIA:
596  case ARM::LDMDA:
597  case ARM::LDMDB:
598  case ARM::LDMIB:
599  case ARM::STMIA:
600  case ARM::STMDA:
601  case ARM::STMDB:
602  case ARM::STMIB:
603  case ARM::t2LDMIA:
604  case ARM::t2LDMDB:
605  case ARM::t2STMIA:
606  case ARM::t2STMDB:
607  case ARM::VLDMSIA:
608  case ARM::VSTMSIA:
609    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
610  case ARM::VLDMDIA:
611  case ARM::VSTMDIA:
612    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
613  }
614}
615
616static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
617                                            ARM_AM::AMSubMode Mode) {
618  switch (Opc) {
619  default: llvm_unreachable("Unhandled opcode!");
620  case ARM::LDMIA:
621  case ARM::LDMDA:
622  case ARM::LDMDB:
623  case ARM::LDMIB:
624    switch (Mode) {
625    default: llvm_unreachable("Unhandled submode!");
626    case ARM_AM::ia: return ARM::LDMIA_UPD;
627    case ARM_AM::ib: return ARM::LDMIB_UPD;
628    case ARM_AM::da: return ARM::LDMDA_UPD;
629    case ARM_AM::db: return ARM::LDMDB_UPD;
630    }
631  case ARM::STMIA:
632  case ARM::STMDA:
633  case ARM::STMDB:
634  case ARM::STMIB:
635    switch (Mode) {
636    default: llvm_unreachable("Unhandled submode!");
637    case ARM_AM::ia: return ARM::STMIA_UPD;
638    case ARM_AM::ib: return ARM::STMIB_UPD;
639    case ARM_AM::da: return ARM::STMDA_UPD;
640    case ARM_AM::db: return ARM::STMDB_UPD;
641    }
642  case ARM::t2LDMIA:
643  case ARM::t2LDMDB:
644    switch (Mode) {
645    default: llvm_unreachable("Unhandled submode!");
646    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
647    case ARM_AM::db: return ARM::t2LDMDB_UPD;
648    }
649  case ARM::t2STMIA:
650  case ARM::t2STMDB:
651    switch (Mode) {
652    default: llvm_unreachable("Unhandled submode!");
653    case ARM_AM::ia: return ARM::t2STMIA_UPD;
654    case ARM_AM::db: return ARM::t2STMDB_UPD;
655    }
656  case ARM::VLDMSIA:
657    switch (Mode) {
658    default: llvm_unreachable("Unhandled submode!");
659    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
660    case ARM_AM::db: return ARM::VLDMSDB_UPD;
661    }
662  case ARM::VLDMDIA:
663    switch (Mode) {
664    default: llvm_unreachable("Unhandled submode!");
665    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
666    case ARM_AM::db: return ARM::VLDMDDB_UPD;
667    }
668  case ARM::VSTMSIA:
669    switch (Mode) {
670    default: llvm_unreachable("Unhandled submode!");
671    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
672    case ARM_AM::db: return ARM::VSTMSDB_UPD;
673    }
674  case ARM::VSTMDIA:
675    switch (Mode) {
676    default: llvm_unreachable("Unhandled submode!");
677    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
678    case ARM_AM::db: return ARM::VSTMDDB_UPD;
679    }
680  }
681}
682
683/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
684/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
685///
686/// stmia rn, <ra, rb, rc>
687/// rn := rn + 4 * 3;
688/// =>
689/// stmia rn!, <ra, rb, rc>
690///
691/// rn := rn - 4 * 3;
692/// ldmia rn, <ra, rb, rc>
693/// =>
694/// ldmdb rn!, <ra, rb, rc>
695bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
696                                               MachineBasicBlock::iterator MBBI,
697                                               bool &Advance,
698                                               MachineBasicBlock::iterator &I) {
699  MachineInstr *MI = MBBI;
700  unsigned Base = MI->getOperand(0).getReg();
701  bool BaseKill = MI->getOperand(0).isKill();
702  unsigned Bytes = getLSMultipleTransferSize(MI);
703  unsigned PredReg = 0;
704  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
705  int Opcode = MI->getOpcode();
706  DebugLoc dl = MI->getDebugLoc();
707
708  // Can't use an updating ld/st if the base register is also a dest
709  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
710  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
711    if (MI->getOperand(i).getReg() == Base)
712      return false;
713
714  bool DoMerge = false;
715  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
716
717  // Try merging with the previous instruction.
718  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
719  if (MBBI != BeginMBBI) {
720    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
721    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
722      --PrevMBBI;
723    if (Mode == ARM_AM::ia &&
724        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
725      Mode = ARM_AM::db;
726      DoMerge = true;
727    } else if (Mode == ARM_AM::ib &&
728               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
729      Mode = ARM_AM::da;
730      DoMerge = true;
731    }
732    if (DoMerge)
733      MBB.erase(PrevMBBI);
734  }
735
736  // Try merging with the next instruction.
737  MachineBasicBlock::iterator EndMBBI = MBB.end();
738  if (!DoMerge && MBBI != EndMBBI) {
739    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
740    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
741      ++NextMBBI;
742    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
743        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
744      DoMerge = true;
745    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
746               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
747      DoMerge = true;
748    }
749    if (DoMerge) {
750      if (NextMBBI == I) {
751        Advance = true;
752        ++I;
753      }
754      MBB.erase(NextMBBI);
755    }
756  }
757
758  if (!DoMerge)
759    return false;
760
761  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
762  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
763    .addReg(Base, getDefRegState(true)) // WB base register
764    .addReg(Base, getKillRegState(BaseKill))
765    .addImm(Pred).addReg(PredReg);
766
767  // Transfer the rest of operands.
768  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
769    MIB.addOperand(MI->getOperand(OpNum));
770
771  // Transfer memoperands.
772  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
773
774  MBB.erase(MBBI);
775  return true;
776}
777
778static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
779                                             ARM_AM::AddrOpc Mode) {
780  switch (Opc) {
781  case ARM::LDRi12:
782    return ARM::LDR_PRE_IMM;
783  case ARM::STRi12:
784    return ARM::STR_PRE_IMM;
785  case ARM::VLDRS:
786    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
787  case ARM::VLDRD:
788    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
789  case ARM::VSTRS:
790    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
791  case ARM::VSTRD:
792    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
793  case ARM::t2LDRi8:
794  case ARM::t2LDRi12:
795    return ARM::t2LDR_PRE;
796  case ARM::t2STRi8:
797  case ARM::t2STRi12:
798    return ARM::t2STR_PRE;
799  default: llvm_unreachable("Unhandled opcode!");
800  }
801}
802
803static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
804                                              ARM_AM::AddrOpc Mode) {
805  switch (Opc) {
806  case ARM::LDRi12:
807    return ARM::LDR_POST_IMM;
808  case ARM::STRi12:
809    return ARM::STR_POST_IMM;
810  case ARM::VLDRS:
811    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
812  case ARM::VLDRD:
813    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
814  case ARM::VSTRS:
815    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
816  case ARM::VSTRD:
817    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
818  case ARM::t2LDRi8:
819  case ARM::t2LDRi12:
820    return ARM::t2LDR_POST;
821  case ARM::t2STRi8:
822  case ARM::t2STRi12:
823    return ARM::t2STR_POST;
824  default: llvm_unreachable("Unhandled opcode!");
825  }
826}
827
828/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
829/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
830bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
831                                               MachineBasicBlock::iterator MBBI,
832                                               const TargetInstrInfo *TII,
833                                               bool &Advance,
834                                               MachineBasicBlock::iterator &I) {
835  MachineInstr *MI = MBBI;
836  unsigned Base = MI->getOperand(1).getReg();
837  bool BaseKill = MI->getOperand(1).isKill();
838  unsigned Bytes = getLSMultipleTransferSize(MI);
839  int Opcode = MI->getOpcode();
840  DebugLoc dl = MI->getDebugLoc();
841  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
842                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
843  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
844  if (isi32Load(Opcode) || isi32Store(Opcode))
845    if (MI->getOperand(2).getImm() != 0)
846      return false;
847  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
848    return false;
849
850  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
851  // Can't do the merge if the destination register is the same as the would-be
852  // writeback register.
853  if (isLd && MI->getOperand(0).getReg() == Base)
854    return false;
855
856  unsigned PredReg = 0;
857  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
858  bool DoMerge = false;
859  ARM_AM::AddrOpc AddSub = ARM_AM::add;
860  unsigned NewOpc = 0;
861  // AM2 - 12 bits, thumb2 - 8 bits.
862  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
863
864  // Try merging with the previous instruction.
865  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
866  if (MBBI != BeginMBBI) {
867    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
868    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
869      --PrevMBBI;
870    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
871      DoMerge = true;
872      AddSub = ARM_AM::sub;
873    } else if (!isAM5 &&
874               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
875      DoMerge = true;
876    }
877    if (DoMerge) {
878      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
879      MBB.erase(PrevMBBI);
880    }
881  }
882
883  // Try merging with the next instruction.
884  MachineBasicBlock::iterator EndMBBI = MBB.end();
885  if (!DoMerge && MBBI != EndMBBI) {
886    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
887    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
888      ++NextMBBI;
889    if (!isAM5 &&
890        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
891      DoMerge = true;
892      AddSub = ARM_AM::sub;
893    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
894      DoMerge = true;
895    }
896    if (DoMerge) {
897      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
898      if (NextMBBI == I) {
899        Advance = true;
900        ++I;
901      }
902      MBB.erase(NextMBBI);
903    }
904  }
905
906  if (!DoMerge)
907    return false;
908
909  if (isAM5) {
910    // VLDM[SD}_UPD, VSTM[SD]_UPD
911    // (There are no base-updating versions of VLDR/VSTR instructions, but the
912    // updating load/store-multiple instructions can be used with only one
913    // register.)
914    MachineOperand &MO = MI->getOperand(0);
915    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
916      .addReg(Base, getDefRegState(true)) // WB base register
917      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
918      .addImm(Pred).addReg(PredReg)
919      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
920                            getKillRegState(MO.isKill())));
921  } else if (isLd) {
922    if (isAM2) {
923      // LDR_PRE, LDR_POST
924      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
925        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
926        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
927          .addReg(Base, RegState::Define)
928          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
929      } else {
930        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
931        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
932          .addReg(Base, RegState::Define)
933          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
934      }
935    } else {
936      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
937      // t2LDR_PRE, t2LDR_POST
938      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
939        .addReg(Base, RegState::Define)
940        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
941    }
942  } else {
943    MachineOperand &MO = MI->getOperand(0);
944    // FIXME: post-indexed stores use am2offset_imm, which still encodes
945    // the vestigal zero-reg offset register. When that's fixed, this clause
946    // can be removed entirely.
947    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
948      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
949      // STR_PRE, STR_POST
950      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
951        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
952        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
953    } else {
954      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
955      // t2STR_PRE, t2STR_POST
956      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
957        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
958        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
959    }
960  }
961  MBB.erase(MBBI);
962
963  return true;
964}
965
966/// isMemoryOp - Returns true if instruction is a memory operation that this
967/// pass is capable of operating on.
968static bool isMemoryOp(const MachineInstr *MI) {
969  // When no memory operands are present, conservatively assume unaligned,
970  // volatile, unfoldable.
971  if (!MI->hasOneMemOperand())
972    return false;
973
974  const MachineMemOperand *MMO = *MI->memoperands_begin();
975
976  // Don't touch volatile memory accesses - we may be changing their order.
977  if (MMO->isVolatile())
978    return false;
979
980  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
981  // not.
982  if (MMO->getAlignment() < 4)
983    return false;
984
985  // str <undef> could probably be eliminated entirely, but for now we just want
986  // to avoid making a mess of it.
987  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
988  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
989      MI->getOperand(0).isUndef())
990    return false;
991
992  // Likewise don't mess with references to undefined addresses.
993  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
994      MI->getOperand(1).isUndef())
995    return false;
996
997  int Opcode = MI->getOpcode();
998  switch (Opcode) {
999  default: break;
1000  case ARM::VLDRS:
1001  case ARM::VSTRS:
1002    return MI->getOperand(1).isReg();
1003  case ARM::VLDRD:
1004  case ARM::VSTRD:
1005    return MI->getOperand(1).isReg();
1006  case ARM::LDRi12:
1007  case ARM::STRi12:
1008  case ARM::t2LDRi8:
1009  case ARM::t2LDRi12:
1010  case ARM::t2STRi8:
1011  case ARM::t2STRi12:
1012    return MI->getOperand(1).isReg();
1013  }
1014  return false;
1015}
1016
1017/// AdvanceRS - Advance register scavenger to just before the earliest memory
1018/// op that is being merged.
1019void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1020  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1021  unsigned Position = MemOps[0].Position;
1022  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1023    if (MemOps[i].Position < Position) {
1024      Position = MemOps[i].Position;
1025      Loc = MemOps[i].MBBI;
1026    }
1027  }
1028
1029  if (Loc != MBB.begin())
1030    RS->forward(prior(Loc));
1031}
1032
1033static int getMemoryOpOffset(const MachineInstr *MI) {
1034  int Opcode = MI->getOpcode();
1035  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1036  unsigned NumOperands = MI->getDesc().getNumOperands();
1037  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1038
1039  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1040      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1041      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1042      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1043    return OffField;
1044
1045  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1046    : ARM_AM::getAM5Offset(OffField) * 4;
1047  if (isAM3) {
1048    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1049      Offset = -Offset;
1050  } else {
1051    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1052      Offset = -Offset;
1053  }
1054  return Offset;
1055}
1056
1057static void InsertLDR_STR(MachineBasicBlock &MBB,
1058                          MachineBasicBlock::iterator &MBBI,
1059                          int Offset, bool isDef,
1060                          DebugLoc dl, unsigned NewOpc,
1061                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1062                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1063                          bool OffKill, bool OffUndef,
1064                          ARMCC::CondCodes Pred, unsigned PredReg,
1065                          const TargetInstrInfo *TII, bool isT2) {
1066  if (isDef) {
1067    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1068                                      TII->get(NewOpc))
1069      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1070      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1071    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1072  } else {
1073    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1074                                      TII->get(NewOpc))
1075      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1076      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1077    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1078  }
1079}
1080
1081bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1082                                          MachineBasicBlock::iterator &MBBI) {
1083  MachineInstr *MI = &*MBBI;
1084  unsigned Opcode = MI->getOpcode();
1085  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1086      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1087    const MachineOperand &BaseOp = MI->getOperand(2);
1088    unsigned BaseReg = BaseOp.getReg();
1089    unsigned EvenReg = MI->getOperand(0).getReg();
1090    unsigned OddReg  = MI->getOperand(1).getReg();
1091    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1092    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1093    // ARM errata 602117: LDRD with base in list may result in incorrect base
1094    // register when interrupted or faulted.
1095    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1096    if (!Errata602117 &&
1097        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1098      return false;
1099
1100    MachineBasicBlock::iterator NewBBI = MBBI;
1101    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1102    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1103    bool EvenDeadKill = isLd ?
1104      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1105    bool EvenUndef = MI->getOperand(0).isUndef();
1106    bool OddDeadKill  = isLd ?
1107      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1108    bool OddUndef = MI->getOperand(1).isUndef();
1109    bool BaseKill = BaseOp.isKill();
1110    bool BaseUndef = BaseOp.isUndef();
1111    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1112    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1113    int OffImm = getMemoryOpOffset(MI);
1114    unsigned PredReg = 0;
1115    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1116
1117    if (OddRegNum > EvenRegNum && OffImm == 0) {
1118      // Ascending register numbers and no offset. It's safe to change it to a
1119      // ldm or stm.
1120      unsigned NewOpc = (isLd)
1121        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1122        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1123      if (isLd) {
1124        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1125          .addReg(BaseReg, getKillRegState(BaseKill))
1126          .addImm(Pred).addReg(PredReg)
1127          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1128          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1129        ++NumLDRD2LDM;
1130      } else {
1131        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1132          .addReg(BaseReg, getKillRegState(BaseKill))
1133          .addImm(Pred).addReg(PredReg)
1134          .addReg(EvenReg,
1135                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1136          .addReg(OddReg,
1137                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1138        ++NumSTRD2STM;
1139      }
1140      NewBBI = llvm::prior(MBBI);
1141    } else {
1142      // Split into two instructions.
1143      unsigned NewOpc = (isLd)
1144        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1145        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1146      DebugLoc dl = MBBI->getDebugLoc();
1147      // If this is a load and base register is killed, it may have been
1148      // re-defed by the load, make sure the first load does not clobber it.
1149      if (isLd &&
1150          (BaseKill || OffKill) &&
1151          (TRI->regsOverlap(EvenReg, BaseReg))) {
1152        assert(!TRI->regsOverlap(OddReg, BaseReg));
1153        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1154                      OddReg, OddDeadKill, false,
1155                      BaseReg, false, BaseUndef, false, OffUndef,
1156                      Pred, PredReg, TII, isT2);
1157        NewBBI = llvm::prior(MBBI);
1158        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1159                      EvenReg, EvenDeadKill, false,
1160                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1161                      Pred, PredReg, TII, isT2);
1162      } else {
1163        if (OddReg == EvenReg && EvenDeadKill) {
1164          // If the two source operands are the same, the kill marker is
1165          // probably on the first one. e.g.
1166          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1167          EvenDeadKill = false;
1168          OddDeadKill = true;
1169        }
1170        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1171                      EvenReg, EvenDeadKill, EvenUndef,
1172                      BaseReg, false, BaseUndef, false, OffUndef,
1173                      Pred, PredReg, TII, isT2);
1174        NewBBI = llvm::prior(MBBI);
1175        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1176                      OddReg, OddDeadKill, OddUndef,
1177                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1178                      Pred, PredReg, TII, isT2);
1179      }
1180      if (isLd)
1181        ++NumLDRD2LDR;
1182      else
1183        ++NumSTRD2STR;
1184    }
1185
1186    MBB.erase(MI);
1187    MBBI = NewBBI;
1188    return true;
1189  }
1190  return false;
1191}
1192
1193/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1194/// ops of the same base and incrementing offset into LDM / STM ops.
1195bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1196  unsigned NumMerges = 0;
1197  unsigned NumMemOps = 0;
1198  MemOpQueue MemOps;
1199  unsigned CurrBase = 0;
1200  int CurrOpc = -1;
1201  unsigned CurrSize = 0;
1202  ARMCC::CondCodes CurrPred = ARMCC::AL;
1203  unsigned CurrPredReg = 0;
1204  unsigned Position = 0;
1205  SmallVector<MachineBasicBlock::iterator,4> Merges;
1206
1207  RS->enterBasicBlock(&MBB);
1208  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1209  while (MBBI != E) {
1210    if (FixInvalidRegPairOp(MBB, MBBI))
1211      continue;
1212
1213    bool Advance  = false;
1214    bool TryMerge = false;
1215    bool Clobber  = false;
1216
1217    bool isMemOp = isMemoryOp(MBBI);
1218    if (isMemOp) {
1219      int Opcode = MBBI->getOpcode();
1220      unsigned Size = getLSMultipleTransferSize(MBBI);
1221      const MachineOperand &MO = MBBI->getOperand(0);
1222      unsigned Reg = MO.getReg();
1223      bool isKill = MO.isDef() ? false : MO.isKill();
1224      unsigned Base = MBBI->getOperand(1).getReg();
1225      unsigned PredReg = 0;
1226      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1227      int Offset = getMemoryOpOffset(MBBI);
1228      // Watch out for:
1229      // r4 := ldr [r5]
1230      // r5 := ldr [r5, #4]
1231      // r6 := ldr [r5, #8]
1232      //
1233      // The second ldr has effectively broken the chain even though it
1234      // looks like the later ldr(s) use the same base register. Try to
1235      // merge the ldr's so far, including this one. But don't try to
1236      // combine the following ldr(s).
1237      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1238      if (CurrBase == 0 && !Clobber) {
1239        // Start of a new chain.
1240        CurrBase = Base;
1241        CurrOpc  = Opcode;
1242        CurrSize = Size;
1243        CurrPred = Pred;
1244        CurrPredReg = PredReg;
1245        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1246        ++NumMemOps;
1247        Advance = true;
1248      } else {
1249        if (Clobber) {
1250          TryMerge = true;
1251          Advance = true;
1252        }
1253
1254        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1255          // No need to match PredReg.
1256          // Continue adding to the queue.
1257          if (Offset > MemOps.back().Offset) {
1258            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1259                                             Position, MBBI));
1260            ++NumMemOps;
1261            Advance = true;
1262          } else {
1263            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1264                 I != E; ++I) {
1265              if (Offset < I->Offset) {
1266                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1267                                                 Position, MBBI));
1268                ++NumMemOps;
1269                Advance = true;
1270                break;
1271              } else if (Offset == I->Offset) {
1272                // Collision! This can't be merged!
1273                break;
1274              }
1275            }
1276          }
1277        }
1278      }
1279    }
1280
1281    if (MBBI->isDebugValue()) {
1282      ++MBBI;
1283      if (MBBI == E)
1284        // Reach the end of the block, try merging the memory instructions.
1285        TryMerge = true;
1286    } else if (Advance) {
1287      ++Position;
1288      ++MBBI;
1289      if (MBBI == E)
1290        // Reach the end of the block, try merging the memory instructions.
1291        TryMerge = true;
1292    } else
1293      TryMerge = true;
1294
1295    if (TryMerge) {
1296      if (NumMemOps > 1) {
1297        // Try to find a free register to use as a new base in case it's needed.
1298        // First advance to the instruction just before the start of the chain.
1299        AdvanceRS(MBB, MemOps);
1300        // Find a scratch register.
1301        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1302        // Process the load / store instructions.
1303        RS->forward(prior(MBBI));
1304
1305        // Merge ops.
1306        Merges.clear();
1307        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1308                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1309
1310        // Try folding preceding/trailing base inc/dec into the generated
1311        // LDM/STM ops.
1312        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1313          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1314            ++NumMerges;
1315        NumMerges += Merges.size();
1316
1317        // Try folding preceding/trailing base inc/dec into those load/store
1318        // that were not merged to form LDM/STM ops.
1319        for (unsigned i = 0; i != NumMemOps; ++i)
1320          if (!MemOps[i].Merged)
1321            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1322              ++NumMerges;
1323
1324        // RS may be pointing to an instruction that's deleted.
1325        RS->skipTo(prior(MBBI));
1326      } else if (NumMemOps == 1) {
1327        // Try folding preceding/trailing base inc/dec into the single
1328        // load/store.
1329        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1330          ++NumMerges;
1331          RS->forward(prior(MBBI));
1332        }
1333      }
1334
1335      CurrBase = 0;
1336      CurrOpc = -1;
1337      CurrSize = 0;
1338      CurrPred = ARMCC::AL;
1339      CurrPredReg = 0;
1340      if (NumMemOps) {
1341        MemOps.clear();
1342        NumMemOps = 0;
1343      }
1344
1345      // If iterator hasn't been advanced and this is not a memory op, skip it.
1346      // It can't start a new chain anyway.
1347      if (!Advance && !isMemOp && MBBI != E) {
1348        ++Position;
1349        ++MBBI;
1350      }
1351    }
1352  }
1353  return NumMerges > 0;
1354}
1355
1356/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1357/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1358/// directly restore the value of LR into pc.
1359///   ldmfd sp!, {..., lr}
1360///   bx lr
1361/// or
1362///   ldmfd sp!, {..., lr}
1363///   mov pc, lr
1364/// =>
1365///   ldmfd sp!, {..., pc}
1366bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1367  if (MBB.empty()) return false;
1368
1369  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1370  if (MBBI != MBB.begin() &&
1371      (MBBI->getOpcode() == ARM::BX_RET ||
1372       MBBI->getOpcode() == ARM::tBX_RET ||
1373       MBBI->getOpcode() == ARM::MOVPCLR)) {
1374    MachineInstr *PrevMI = prior(MBBI);
1375    unsigned Opcode = PrevMI->getOpcode();
1376    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1377        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1378        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1379      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1380      if (MO.getReg() != ARM::LR)
1381        return false;
1382      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1383      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1384              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1385      PrevMI->setDesc(TII->get(NewOpc));
1386      MO.setReg(ARM::PC);
1387      PrevMI->copyImplicitOps(&*MBBI);
1388      MBB.erase(MBBI);
1389      return true;
1390    }
1391  }
1392  return false;
1393}
1394
1395bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1396  const TargetMachine &TM = Fn.getTarget();
1397  AFI = Fn.getInfo<ARMFunctionInfo>();
1398  TII = TM.getInstrInfo();
1399  TRI = TM.getRegisterInfo();
1400  STI = &TM.getSubtarget<ARMSubtarget>();
1401  RS = new RegScavenger();
1402  isThumb2 = AFI->isThumb2Function();
1403
1404  bool Modified = false;
1405  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1406       ++MFI) {
1407    MachineBasicBlock &MBB = *MFI;
1408    Modified |= LoadStoreMultipleOpti(MBB);
1409    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1410      Modified |= MergeReturnIntoLDM(MBB);
1411  }
1412
1413  delete RS;
1414  return Modified;
1415}
1416
1417
1418/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1419/// load / stores from consecutive locations close to make it more
1420/// likely they will be combined later.
1421
1422namespace {
1423  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1424    static char ID;
1425    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1426
1427    const TargetData *TD;
1428    const TargetInstrInfo *TII;
1429    const TargetRegisterInfo *TRI;
1430    const ARMSubtarget *STI;
1431    MachineRegisterInfo *MRI;
1432    MachineFunction *MF;
1433
1434    virtual bool runOnMachineFunction(MachineFunction &Fn);
1435
1436    virtual const char *getPassName() const {
1437      return "ARM pre- register allocation load / store optimization pass";
1438    }
1439
1440  private:
1441    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1442                          unsigned &NewOpc, unsigned &EvenReg,
1443                          unsigned &OddReg, unsigned &BaseReg,
1444                          int &Offset,
1445                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1446                          bool &isT2);
1447    bool RescheduleOps(MachineBasicBlock *MBB,
1448                       SmallVector<MachineInstr*, 4> &Ops,
1449                       unsigned Base, bool isLd,
1450                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1451    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1452  };
1453  char ARMPreAllocLoadStoreOpt::ID = 0;
1454}
1455
1456bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1457  TD  = Fn.getTarget().getTargetData();
1458  TII = Fn.getTarget().getInstrInfo();
1459  TRI = Fn.getTarget().getRegisterInfo();
1460  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1461  MRI = &Fn.getRegInfo();
1462  MF  = &Fn;
1463
1464  bool Modified = false;
1465  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1466       ++MFI)
1467    Modified |= RescheduleLoadStoreInstrs(MFI);
1468
1469  return Modified;
1470}
1471
1472static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1473                                      MachineBasicBlock::iterator I,
1474                                      MachineBasicBlock::iterator E,
1475                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1476                                      SmallSet<unsigned, 4> &MemRegs,
1477                                      const TargetRegisterInfo *TRI) {
1478  // Are there stores / loads / calls between them?
1479  // FIXME: This is overly conservative. We should make use of alias information
1480  // some day.
1481  SmallSet<unsigned, 4> AddedRegPressure;
1482  while (++I != E) {
1483    if (I->isDebugValue() || MemOps.count(&*I))
1484      continue;
1485    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1486      return false;
1487    if (isLd && I->mayStore())
1488      return false;
1489    if (!isLd) {
1490      if (I->mayLoad())
1491        return false;
1492      // It's not safe to move the first 'str' down.
1493      // str r1, [r0]
1494      // strh r5, [r0]
1495      // str r4, [r0, #+4]
1496      if (I->mayStore())
1497        return false;
1498    }
1499    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1500      MachineOperand &MO = I->getOperand(j);
1501      if (!MO.isReg())
1502        continue;
1503      unsigned Reg = MO.getReg();
1504      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1505        return false;
1506      if (Reg != Base && !MemRegs.count(Reg))
1507        AddedRegPressure.insert(Reg);
1508    }
1509  }
1510
1511  // Estimate register pressure increase due to the transformation.
1512  if (MemRegs.size() <= 4)
1513    // Ok if we are moving small number of instructions.
1514    return true;
1515  return AddedRegPressure.size() <= MemRegs.size() * 2;
1516}
1517
1518
1519/// Copy Op0 and Op1 operands into a new array assigned to MI.
1520static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1521                                   MachineInstr *Op1) {
1522  assert(MI->memoperands_empty() && "expected a new machineinstr");
1523  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1524    + (Op1->memoperands_end() - Op1->memoperands_begin());
1525
1526  MachineFunction *MF = MI->getParent()->getParent();
1527  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1528  MachineSDNode::mmo_iterator MemEnd =
1529    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1530  MemEnd =
1531    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1532  MI->setMemRefs(MemBegin, MemEnd);
1533}
1534
1535bool
1536ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1537                                          DebugLoc &dl,
1538                                          unsigned &NewOpc, unsigned &EvenReg,
1539                                          unsigned &OddReg, unsigned &BaseReg,
1540                                          int &Offset, unsigned &PredReg,
1541                                          ARMCC::CondCodes &Pred,
1542                                          bool &isT2) {
1543  // Make sure we're allowed to generate LDRD/STRD.
1544  if (!STI->hasV5TEOps())
1545    return false;
1546
1547  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1548  unsigned Scale = 1;
1549  unsigned Opcode = Op0->getOpcode();
1550  if (Opcode == ARM::LDRi12)
1551    NewOpc = ARM::LDRD;
1552  else if (Opcode == ARM::STRi12)
1553    NewOpc = ARM::STRD;
1554  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1555    NewOpc = ARM::t2LDRDi8;
1556    Scale = 4;
1557    isT2 = true;
1558  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1559    NewOpc = ARM::t2STRDi8;
1560    Scale = 4;
1561    isT2 = true;
1562  } else
1563    return false;
1564
1565  // Make sure the base address satisfies i64 ld / st alignment requirement.
1566  if (!Op0->hasOneMemOperand() ||
1567      !(*Op0->memoperands_begin())->getValue() ||
1568      (*Op0->memoperands_begin())->isVolatile())
1569    return false;
1570
1571  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1572  const Function *Func = MF->getFunction();
1573  unsigned ReqAlign = STI->hasV6Ops()
1574    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1575    : 8;  // Pre-v6 need 8-byte align
1576  if (Align < ReqAlign)
1577    return false;
1578
1579  // Then make sure the immediate offset fits.
1580  int OffImm = getMemoryOpOffset(Op0);
1581  if (isT2) {
1582    int Limit = (1 << 8) * Scale;
1583    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1584      return false;
1585    Offset = OffImm;
1586  } else {
1587    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1588    if (OffImm < 0) {
1589      AddSub = ARM_AM::sub;
1590      OffImm = - OffImm;
1591    }
1592    int Limit = (1 << 8) * Scale;
1593    if (OffImm >= Limit || (OffImm & (Scale-1)))
1594      return false;
1595    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1596  }
1597  EvenReg = Op0->getOperand(0).getReg();
1598  OddReg  = Op1->getOperand(0).getReg();
1599  if (EvenReg == OddReg)
1600    return false;
1601  BaseReg = Op0->getOperand(1).getReg();
1602  Pred = llvm::getInstrPredicate(Op0, PredReg);
1603  dl = Op0->getDebugLoc();
1604  return true;
1605}
1606
1607namespace {
1608  struct OffsetCompare {
1609    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1610      int LOffset = getMemoryOpOffset(LHS);
1611      int ROffset = getMemoryOpOffset(RHS);
1612      assert(LHS == RHS || LOffset != ROffset);
1613      return LOffset > ROffset;
1614    }
1615  };
1616}
1617
1618bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1619                                 SmallVector<MachineInstr*, 4> &Ops,
1620                                 unsigned Base, bool isLd,
1621                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1622  bool RetVal = false;
1623
1624  // Sort by offset (in reverse order).
1625  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1626
1627  // The loads / stores of the same base are in order. Scan them from first to
1628  // last and check for the following:
1629  // 1. Any def of base.
1630  // 2. Any gaps.
1631  while (Ops.size() > 1) {
1632    unsigned FirstLoc = ~0U;
1633    unsigned LastLoc = 0;
1634    MachineInstr *FirstOp = 0;
1635    MachineInstr *LastOp = 0;
1636    int LastOffset = 0;
1637    unsigned LastOpcode = 0;
1638    unsigned LastBytes = 0;
1639    unsigned NumMove = 0;
1640    for (int i = Ops.size() - 1; i >= 0; --i) {
1641      MachineInstr *Op = Ops[i];
1642      unsigned Loc = MI2LocMap[Op];
1643      if (Loc <= FirstLoc) {
1644        FirstLoc = Loc;
1645        FirstOp = Op;
1646      }
1647      if (Loc >= LastLoc) {
1648        LastLoc = Loc;
1649        LastOp = Op;
1650      }
1651
1652      unsigned LSMOpcode
1653        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1654      if (LastOpcode && LSMOpcode != LastOpcode)
1655        break;
1656
1657      int Offset = getMemoryOpOffset(Op);
1658      unsigned Bytes = getLSMultipleTransferSize(Op);
1659      if (LastBytes) {
1660        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1661          break;
1662      }
1663      LastOffset = Offset;
1664      LastBytes = Bytes;
1665      LastOpcode = LSMOpcode;
1666      if (++NumMove == 8) // FIXME: Tune this limit.
1667        break;
1668    }
1669
1670    if (NumMove <= 1)
1671      Ops.pop_back();
1672    else {
1673      SmallPtrSet<MachineInstr*, 4> MemOps;
1674      SmallSet<unsigned, 4> MemRegs;
1675      for (int i = NumMove-1; i >= 0; --i) {
1676        MemOps.insert(Ops[i]);
1677        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1678      }
1679
1680      // Be conservative, if the instructions are too far apart, don't
1681      // move them. We want to limit the increase of register pressure.
1682      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1683      if (DoMove)
1684        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1685                                           MemOps, MemRegs, TRI);
1686      if (!DoMove) {
1687        for (unsigned i = 0; i != NumMove; ++i)
1688          Ops.pop_back();
1689      } else {
1690        // This is the new location for the loads / stores.
1691        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1692        while (InsertPos != MBB->end()
1693               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1694          ++InsertPos;
1695
1696        // If we are moving a pair of loads / stores, see if it makes sense
1697        // to try to allocate a pair of registers that can form register pairs.
1698        MachineInstr *Op0 = Ops.back();
1699        MachineInstr *Op1 = Ops[Ops.size()-2];
1700        unsigned EvenReg = 0, OddReg = 0;
1701        unsigned BaseReg = 0, PredReg = 0;
1702        ARMCC::CondCodes Pred = ARMCC::AL;
1703        bool isT2 = false;
1704        unsigned NewOpc = 0;
1705        int Offset = 0;
1706        DebugLoc dl;
1707        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1708                                             EvenReg, OddReg, BaseReg,
1709                                             Offset, PredReg, Pred, isT2)) {
1710          Ops.pop_back();
1711          Ops.pop_back();
1712
1713          const MCInstrDesc &MCID = TII->get(NewOpc);
1714          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1715          MRI->constrainRegClass(EvenReg, TRC);
1716          MRI->constrainRegClass(OddReg, TRC);
1717
1718          // Form the pair instruction.
1719          if (isLd) {
1720            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1721              .addReg(EvenReg, RegState::Define)
1722              .addReg(OddReg, RegState::Define)
1723              .addReg(BaseReg);
1724            // FIXME: We're converting from LDRi12 to an insn that still
1725            // uses addrmode2, so we need an explicit offset reg. It should
1726            // always by reg0 since we're transforming LDRi12s.
1727            if (!isT2)
1728              MIB.addReg(0);
1729            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1730            concatenateMemOperands(MIB, Op0, Op1);
1731            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1732            ++NumLDRDFormed;
1733          } else {
1734            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1735              .addReg(EvenReg)
1736              .addReg(OddReg)
1737              .addReg(BaseReg);
1738            // FIXME: We're converting from LDRi12 to an insn that still
1739            // uses addrmode2, so we need an explicit offset reg. It should
1740            // always by reg0 since we're transforming STRi12s.
1741            if (!isT2)
1742              MIB.addReg(0);
1743            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1744            concatenateMemOperands(MIB, Op0, Op1);
1745            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1746            ++NumSTRDFormed;
1747          }
1748          MBB->erase(Op0);
1749          MBB->erase(Op1);
1750
1751          // Add register allocation hints to form register pairs.
1752          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1753          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1754        } else {
1755          for (unsigned i = 0; i != NumMove; ++i) {
1756            MachineInstr *Op = Ops.back();
1757            Ops.pop_back();
1758            MBB->splice(InsertPos, MBB, Op);
1759          }
1760        }
1761
1762        NumLdStMoved += NumMove;
1763        RetVal = true;
1764      }
1765    }
1766  }
1767
1768  return RetVal;
1769}
1770
1771bool
1772ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1773  bool RetVal = false;
1774
1775  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1776  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1777  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1778  SmallVector<unsigned, 4> LdBases;
1779  SmallVector<unsigned, 4> StBases;
1780
1781  unsigned Loc = 0;
1782  MachineBasicBlock::iterator MBBI = MBB->begin();
1783  MachineBasicBlock::iterator E = MBB->end();
1784  while (MBBI != E) {
1785    for (; MBBI != E; ++MBBI) {
1786      MachineInstr *MI = MBBI;
1787      if (MI->isCall() || MI->isTerminator()) {
1788        // Stop at barriers.
1789        ++MBBI;
1790        break;
1791      }
1792
1793      if (!MI->isDebugValue())
1794        MI2LocMap[MI] = ++Loc;
1795
1796      if (!isMemoryOp(MI))
1797        continue;
1798      unsigned PredReg = 0;
1799      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1800        continue;
1801
1802      int Opc = MI->getOpcode();
1803      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1804      unsigned Base = MI->getOperand(1).getReg();
1805      int Offset = getMemoryOpOffset(MI);
1806
1807      bool StopHere = false;
1808      if (isLd) {
1809        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1810          Base2LdsMap.find(Base);
1811        if (BI != Base2LdsMap.end()) {
1812          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1813            if (Offset == getMemoryOpOffset(BI->second[i])) {
1814              StopHere = true;
1815              break;
1816            }
1817          }
1818          if (!StopHere)
1819            BI->second.push_back(MI);
1820        } else {
1821          SmallVector<MachineInstr*, 4> MIs;
1822          MIs.push_back(MI);
1823          Base2LdsMap[Base] = MIs;
1824          LdBases.push_back(Base);
1825        }
1826      } else {
1827        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1828          Base2StsMap.find(Base);
1829        if (BI != Base2StsMap.end()) {
1830          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1831            if (Offset == getMemoryOpOffset(BI->second[i])) {
1832              StopHere = true;
1833              break;
1834            }
1835          }
1836          if (!StopHere)
1837            BI->second.push_back(MI);
1838        } else {
1839          SmallVector<MachineInstr*, 4> MIs;
1840          MIs.push_back(MI);
1841          Base2StsMap[Base] = MIs;
1842          StBases.push_back(Base);
1843        }
1844      }
1845
1846      if (StopHere) {
1847        // Found a duplicate (a base+offset combination that's seen earlier).
1848        // Backtrack.
1849        --Loc;
1850        break;
1851      }
1852    }
1853
1854    // Re-schedule loads.
1855    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1856      unsigned Base = LdBases[i];
1857      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1858      if (Lds.size() > 1)
1859        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1860    }
1861
1862    // Re-schedule stores.
1863    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1864      unsigned Base = StBases[i];
1865      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1866      if (Sts.size() > 1)
1867        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1868    }
1869
1870    if (MBBI != E) {
1871      Base2LdsMap.clear();
1872      Base2StsMap.clear();
1873      LdBases.clear();
1874      StBases.clear();
1875    }
1876  }
1877
1878  return RetVal;
1879}
1880
1881
1882/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1883/// optimization pass.
1884FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1885  if (PreAlloc)
1886    return new ARMPreAllocLoadStoreOpt();
1887  return new ARMLoadStoreOpt();
1888}
1889