ARMLoadStoreOptimizer.cpp revision 805543068eb7407d718b0359f54b342d7094d0ea
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMBaseRegisterInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/Target/TargetData.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/STLExtras.h"
39#include "llvm/ADT/SmallPtrSet.h"
40#include "llvm/ADT/SmallSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43using namespace llvm;
44
45STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46STATISTIC(NumSTMGened , "Number of stm instructions generated");
47STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56
57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58/// load / store instructions to form ldm / stm instructions.
59
60namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62    static char ID;
63    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65    const TargetInstrInfo *TII;
66    const TargetRegisterInfo *TRI;
67    const ARMSubtarget *STI;
68    ARMFunctionInfo *AFI;
69    RegScavenger *RS;
70    bool isThumb2;
71
72    virtual bool runOnMachineFunction(MachineFunction &Fn);
73
74    virtual const char *getPassName() const {
75      return "ARM load / store optimization pass";
76    }
77
78  private:
79    struct MemOpQueueEntry {
80      int Offset;
81      unsigned Reg;
82      bool isKill;
83      unsigned Position;
84      MachineBasicBlock::iterator MBBI;
85      bool Merged;
86      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                      MachineBasicBlock::iterator i)
88        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89    };
90    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91    typedef MemOpQueue::iterator MemOpQueueIter;
92
93    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                  int Offset, unsigned Base, bool BaseKill, int Opcode,
95                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97    void MergeOpsUpdate(MachineBasicBlock &MBB,
98                        MemOpQueue &MemOps,
99                        unsigned memOpsBegin,
100                        unsigned memOpsEnd,
101                        unsigned insertAfter,
102                        int Offset,
103                        unsigned Base,
104                        bool BaseKill,
105                        int Opcode,
106                        ARMCC::CondCodes Pred,
107                        unsigned PredReg,
108                        unsigned Scratch,
109                        DebugLoc dl,
110                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112                      int Opcode, unsigned Size,
113                      ARMCC::CondCodes Pred, unsigned PredReg,
114                      unsigned Scratch, MemOpQueue &MemOps,
115                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
116
117    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119                             MachineBasicBlock::iterator &MBBI);
120    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121                                  MachineBasicBlock::iterator MBBI,
122                                  const TargetInstrInfo *TII,
123                                  bool &Advance,
124                                  MachineBasicBlock::iterator &I);
125    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126                                   MachineBasicBlock::iterator MBBI,
127                                   bool &Advance,
128                                   MachineBasicBlock::iterator &I);
129    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
131  };
132  char ARMLoadStoreOpt::ID = 0;
133}
134
135static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
136  switch (Opcode) {
137  default: llvm_unreachable("Unhandled opcode!");
138  case ARM::LDRi12:
139    ++NumLDMGened;
140    switch (Mode) {
141    default: llvm_unreachable("Unhandled submode!");
142    case ARM_AM::ia: return ARM::LDMIA;
143    case ARM_AM::da: return ARM::LDMDA;
144    case ARM_AM::db: return ARM::LDMDB;
145    case ARM_AM::ib: return ARM::LDMIB;
146    }
147  case ARM::STRi12:
148    ++NumSTMGened;
149    switch (Mode) {
150    default: llvm_unreachable("Unhandled submode!");
151    case ARM_AM::ia: return ARM::STMIA;
152    case ARM_AM::da: return ARM::STMDA;
153    case ARM_AM::db: return ARM::STMDB;
154    case ARM_AM::ib: return ARM::STMIB;
155    }
156  case ARM::t2LDRi8:
157  case ARM::t2LDRi12:
158    ++NumLDMGened;
159    switch (Mode) {
160    default: llvm_unreachable("Unhandled submode!");
161    case ARM_AM::ia: return ARM::t2LDMIA;
162    case ARM_AM::db: return ARM::t2LDMDB;
163    }
164  case ARM::t2STRi8:
165  case ARM::t2STRi12:
166    ++NumSTMGened;
167    switch (Mode) {
168    default: llvm_unreachable("Unhandled submode!");
169    case ARM_AM::ia: return ARM::t2STMIA;
170    case ARM_AM::db: return ARM::t2STMDB;
171    }
172  case ARM::VLDRS:
173    ++NumVLDMGened;
174    switch (Mode) {
175    default: llvm_unreachable("Unhandled submode!");
176    case ARM_AM::ia: return ARM::VLDMSIA;
177    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
178    }
179  case ARM::VSTRS:
180    ++NumVSTMGened;
181    switch (Mode) {
182    default: llvm_unreachable("Unhandled submode!");
183    case ARM_AM::ia: return ARM::VSTMSIA;
184    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
185    }
186  case ARM::VLDRD:
187    ++NumVLDMGened;
188    switch (Mode) {
189    default: llvm_unreachable("Unhandled submode!");
190    case ARM_AM::ia: return ARM::VLDMDIA;
191    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
192    }
193  case ARM::VSTRD:
194    ++NumVSTMGened;
195    switch (Mode) {
196    default: llvm_unreachable("Unhandled submode!");
197    case ARM_AM::ia: return ARM::VSTMDIA;
198    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
199    }
200  }
201}
202
203namespace llvm {
204  namespace ARM_AM {
205
206AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
207  switch (Opcode) {
208  default: llvm_unreachable("Unhandled opcode!");
209  case ARM::LDMIA_RET:
210  case ARM::LDMIA:
211  case ARM::LDMIA_UPD:
212  case ARM::STMIA:
213  case ARM::STMIA_UPD:
214  case ARM::t2LDMIA_RET:
215  case ARM::t2LDMIA:
216  case ARM::t2LDMIA_UPD:
217  case ARM::t2STMIA:
218  case ARM::t2STMIA_UPD:
219  case ARM::VLDMSIA:
220  case ARM::VLDMSIA_UPD:
221  case ARM::VSTMSIA:
222  case ARM::VSTMSIA_UPD:
223  case ARM::VLDMDIA:
224  case ARM::VLDMDIA_UPD:
225  case ARM::VSTMDIA:
226  case ARM::VSTMDIA_UPD:
227    return ARM_AM::ia;
228
229  case ARM::LDMDA:
230  case ARM::LDMDA_UPD:
231  case ARM::STMDA:
232  case ARM::STMDA_UPD:
233    return ARM_AM::da;
234
235  case ARM::LDMDB:
236  case ARM::LDMDB_UPD:
237  case ARM::STMDB:
238  case ARM::STMDB_UPD:
239  case ARM::t2LDMDB:
240  case ARM::t2LDMDB_UPD:
241  case ARM::t2STMDB:
242  case ARM::t2STMDB_UPD:
243  case ARM::VLDMSDB_UPD:
244  case ARM::VSTMSDB_UPD:
245  case ARM::VLDMDDB_UPD:
246  case ARM::VSTMDDB_UPD:
247    return ARM_AM::db;
248
249  case ARM::LDMIB:
250  case ARM::LDMIB_UPD:
251  case ARM::STMIB:
252  case ARM::STMIB_UPD:
253    return ARM_AM::ib;
254  }
255}
256
257  } // end namespace ARM_AM
258} // end namespace llvm
259
260static bool isT2i32Load(unsigned Opc) {
261  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
262}
263
264static bool isi32Load(unsigned Opc) {
265  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
266}
267
268static bool isT2i32Store(unsigned Opc) {
269  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
270}
271
272static bool isi32Store(unsigned Opc) {
273  return Opc == ARM::STRi12 || isT2i32Store(Opc);
274}
275
276/// MergeOps - Create and insert a LDM or STM with Base as base register and
277/// registers in Regs as the register operands that would be loaded / stored.
278/// It returns true if the transformation is done.
279bool
280ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
281                          MachineBasicBlock::iterator MBBI,
282                          int Offset, unsigned Base, bool BaseKill,
283                          int Opcode, ARMCC::CondCodes Pred,
284                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
285                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
286  // Only a single register to load / store. Don't bother.
287  unsigned NumRegs = Regs.size();
288  if (NumRegs <= 1)
289    return false;
290
291  ARM_AM::AMSubMode Mode = ARM_AM::ia;
292  // VFP and Thumb2 do not support IB or DA modes.
293  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
294  bool haveIBAndDA = isNotVFP && !isThumb2;
295  if (Offset == 4 && haveIBAndDA)
296    Mode = ARM_AM::ib;
297  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
298    Mode = ARM_AM::da;
299  else if (Offset == -4 * (int)NumRegs && isNotVFP)
300    // VLDM/VSTM do not support DB mode without also updating the base reg.
301    Mode = ARM_AM::db;
302  else if (Offset != 0) {
303    // Check if this is a supported opcode before we insert instructions to
304    // calculate a new base register.
305    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
306
307    // If starting offset isn't zero, insert a MI to materialize a new base.
308    // But only do so if it is cost effective, i.e. merging more than two
309    // loads / stores.
310    if (NumRegs <= 2)
311      return false;
312
313    unsigned NewBase;
314    if (isi32Load(Opcode))
315      // If it is a load, then just use one of the destination register to
316      // use as the new base.
317      NewBase = Regs[NumRegs-1].first;
318    else {
319      // Use the scratch register to use as a new base.
320      NewBase = Scratch;
321      if (NewBase == 0)
322        return false;
323    }
324    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
325    if (Offset < 0) {
326      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
327      Offset = - Offset;
328    }
329    int ImmedOffset = isThumb2
330      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
331    if (ImmedOffset == -1)
332      // FIXME: Try t2ADDri12 or t2SUBri12?
333      return false;  // Probably not worth it then.
334
335    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
336      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
337      .addImm(Pred).addReg(PredReg).addReg(0);
338    Base = NewBase;
339    BaseKill = true;  // New base is always killed right its use.
340  }
341
342  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
343                Opcode == ARM::VLDRD);
344  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
345  if (!Opcode) return false;
346  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
347    .addReg(Base, getKillRegState(BaseKill))
348    .addImm(Pred).addReg(PredReg);
349  for (unsigned i = 0; i != NumRegs; ++i)
350    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
351                     | getKillRegState(Regs[i].second));
352
353  return true;
354}
355
356// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
357// success.
358void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
359                                     MemOpQueue &memOps,
360                                     unsigned memOpsBegin, unsigned memOpsEnd,
361                                     unsigned insertAfter, int Offset,
362                                     unsigned Base, bool BaseKill,
363                                     int Opcode,
364                                     ARMCC::CondCodes Pred, unsigned PredReg,
365                                     unsigned Scratch,
366                                     DebugLoc dl,
367                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
368  // First calculate which of the registers should be killed by the merged
369  // instruction.
370  const unsigned insertPos = memOps[insertAfter].Position;
371  SmallSet<unsigned, 4> KilledRegs;
372  DenseMap<unsigned, unsigned> Killer;
373  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
374    if (i == memOpsBegin) {
375      i = memOpsEnd;
376      if (i == e)
377        break;
378    }
379    if (memOps[i].Position < insertPos && memOps[i].isKill) {
380      unsigned Reg = memOps[i].Reg;
381      KilledRegs.insert(Reg);
382      Killer[Reg] = i;
383    }
384  }
385
386  SmallVector<std::pair<unsigned, bool>, 8> Regs;
387  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
388    unsigned Reg = memOps[i].Reg;
389    // If we are inserting the merged operation after an operation that
390    // uses the same register, make sure to transfer any kill flag.
391    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
392    Regs.push_back(std::make_pair(Reg, isKill));
393  }
394
395  // Try to do the merge.
396  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
397  ++Loc;
398  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
399                Pred, PredReg, Scratch, dl, Regs))
400    return;
401
402  // Merge succeeded, update records.
403  Merges.push_back(prior(Loc));
404  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
405    // Remove kill flags from any memops that come before insertPos.
406    if (Regs[i-memOpsBegin].second) {
407      unsigned Reg = Regs[i-memOpsBegin].first;
408      if (KilledRegs.count(Reg)) {
409        unsigned j = Killer[Reg];
410        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
411        assert(Idx >= 0 && "Cannot find killing operand");
412        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
413        memOps[j].isKill = false;
414      }
415      memOps[i].isKill = true;
416    }
417    MBB.erase(memOps[i].MBBI);
418    // Update this memop to refer to the merged instruction.
419    // We may need to move kill flags again.
420    memOps[i].Merged = true;
421    memOps[i].MBBI = Merges.back();
422    memOps[i].Position = insertPos;
423  }
424}
425
426/// MergeLDR_STR - Merge a number of load / store instructions into one or more
427/// load / store multiple instructions.
428void
429ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
430                          unsigned Base, int Opcode, unsigned Size,
431                          ARMCC::CondCodes Pred, unsigned PredReg,
432                          unsigned Scratch, MemOpQueue &MemOps,
433                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
434  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
435  int Offset = MemOps[SIndex].Offset;
436  int SOffset = Offset;
437  unsigned insertAfter = SIndex;
438  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
439  DebugLoc dl = Loc->getDebugLoc();
440  const MachineOperand &PMO = Loc->getOperand(0);
441  unsigned PReg = PMO.getReg();
442  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
443    : getARMRegisterNumbering(PReg);
444  unsigned Count = 1;
445  unsigned Limit = ~0U;
446
447  // vldm / vstm limit are 32 for S variants, 16 for D variants.
448
449  switch (Opcode) {
450  default: break;
451  case ARM::VSTRS:
452    Limit = 32;
453    break;
454  case ARM::VSTRD:
455    Limit = 16;
456    break;
457  case ARM::VLDRD:
458    Limit = 16;
459    break;
460  case ARM::VLDRS:
461    Limit = 32;
462    break;
463  }
464
465  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
466    int NewOffset = MemOps[i].Offset;
467    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
468    unsigned Reg = MO.getReg();
469    unsigned RegNum = MO.isUndef() ? UINT_MAX
470      : getARMRegisterNumbering(Reg);
471    // Register numbers must be in ascending order. For VFP / NEON load and
472    // store multiples, the registers must also be consecutive and within the
473    // limit on the number of registers per instruction.
474    if (Reg != ARM::SP &&
475        NewOffset == Offset + (int)Size &&
476        ((isNotVFP && RegNum > PRegNum) ||
477         ((Count < Limit) && RegNum == PRegNum+1))) {
478      Offset += Size;
479      PRegNum = RegNum;
480      ++Count;
481    } else {
482      // Can't merge this in. Try merge the earlier ones first.
483      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
484                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
485      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
486                   MemOps, Merges);
487      return;
488    }
489
490    if (MemOps[i].Position > MemOps[insertAfter].Position)
491      insertAfter = i;
492  }
493
494  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
495  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
496                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
497  return;
498}
499
500static bool definesCPSR(MachineInstr *MI) {
501  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
502    const MachineOperand &MO = MI->getOperand(i);
503    if (!MO.isReg())
504      continue;
505    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
506      // If the instruction has live CPSR def, then it's not safe to fold it
507      // into load / store.
508      return true;
509  }
510
511  return false;
512}
513
514static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
515                                unsigned Bytes, unsigned Limit,
516                                ARMCC::CondCodes Pred, unsigned PredReg) {
517  unsigned MyPredReg = 0;
518  if (!MI)
519    return false;
520
521  bool CheckCPSRDef = false;
522  switch (MI->getOpcode()) {
523  default: return false;
524  case ARM::t2SUBri:
525  case ARM::SUBri:
526    CheckCPSRDef = true;
527  // fallthrough
528  case ARM::tSUBspi:
529    break;
530  }
531
532  // Make sure the offset fits in 8 bits.
533  if (Bytes == 0 || (Limit && Bytes >= Limit))
534    return false;
535
536  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
537  if (!(MI->getOperand(0).getReg() == Base &&
538        MI->getOperand(1).getReg() == Base &&
539        (MI->getOperand(2).getImm()*Scale) == Bytes &&
540        getInstrPredicate(MI, MyPredReg) == Pred &&
541        MyPredReg == PredReg))
542    return false;
543
544  return CheckCPSRDef ? !definesCPSR(MI) : true;
545}
546
547static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
548                                unsigned Bytes, unsigned Limit,
549                                ARMCC::CondCodes Pred, unsigned PredReg) {
550  unsigned MyPredReg = 0;
551  if (!MI)
552    return false;
553
554  bool CheckCPSRDef = false;
555  switch (MI->getOpcode()) {
556  default: return false;
557  case ARM::t2ADDri:
558  case ARM::ADDri:
559    CheckCPSRDef = true;
560  // fallthrough
561  case ARM::tADDspi:
562    break;
563  }
564
565  if (Bytes == 0 || (Limit && Bytes >= Limit))
566    // Make sure the offset fits in 8 bits.
567    return false;
568
569  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
570  if (!(MI->getOperand(0).getReg() == Base &&
571        MI->getOperand(1).getReg() == Base &&
572        (MI->getOperand(2).getImm()*Scale) == Bytes &&
573        getInstrPredicate(MI, MyPredReg) == Pred &&
574        MyPredReg == PredReg))
575    return false;
576
577  return CheckCPSRDef ? !definesCPSR(MI) : true;
578}
579
580static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
581  switch (MI->getOpcode()) {
582  default: return 0;
583  case ARM::LDRi12:
584  case ARM::STRi12:
585  case ARM::t2LDRi8:
586  case ARM::t2LDRi12:
587  case ARM::t2STRi8:
588  case ARM::t2STRi12:
589  case ARM::VLDRS:
590  case ARM::VSTRS:
591    return 4;
592  case ARM::VLDRD:
593  case ARM::VSTRD:
594    return 8;
595  case ARM::LDMIA:
596  case ARM::LDMDA:
597  case ARM::LDMDB:
598  case ARM::LDMIB:
599  case ARM::STMIA:
600  case ARM::STMDA:
601  case ARM::STMDB:
602  case ARM::STMIB:
603  case ARM::t2LDMIA:
604  case ARM::t2LDMDB:
605  case ARM::t2STMIA:
606  case ARM::t2STMDB:
607  case ARM::VLDMSIA:
608  case ARM::VSTMSIA:
609    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
610  case ARM::VLDMDIA:
611  case ARM::VSTMDIA:
612    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
613  }
614}
615
616static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
617                                            ARM_AM::AMSubMode Mode) {
618  switch (Opc) {
619  default: llvm_unreachable("Unhandled opcode!");
620  case ARM::LDMIA:
621  case ARM::LDMDA:
622  case ARM::LDMDB:
623  case ARM::LDMIB:
624    switch (Mode) {
625    default: llvm_unreachable("Unhandled submode!");
626    case ARM_AM::ia: return ARM::LDMIA_UPD;
627    case ARM_AM::ib: return ARM::LDMIB_UPD;
628    case ARM_AM::da: return ARM::LDMDA_UPD;
629    case ARM_AM::db: return ARM::LDMDB_UPD;
630    }
631  case ARM::STMIA:
632  case ARM::STMDA:
633  case ARM::STMDB:
634  case ARM::STMIB:
635    switch (Mode) {
636    default: llvm_unreachable("Unhandled submode!");
637    case ARM_AM::ia: return ARM::STMIA_UPD;
638    case ARM_AM::ib: return ARM::STMIB_UPD;
639    case ARM_AM::da: return ARM::STMDA_UPD;
640    case ARM_AM::db: return ARM::STMDB_UPD;
641    }
642  case ARM::t2LDMIA:
643  case ARM::t2LDMDB:
644    switch (Mode) {
645    default: llvm_unreachable("Unhandled submode!");
646    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
647    case ARM_AM::db: return ARM::t2LDMDB_UPD;
648    }
649  case ARM::t2STMIA:
650  case ARM::t2STMDB:
651    switch (Mode) {
652    default: llvm_unreachable("Unhandled submode!");
653    case ARM_AM::ia: return ARM::t2STMIA_UPD;
654    case ARM_AM::db: return ARM::t2STMDB_UPD;
655    }
656  case ARM::VLDMSIA:
657    switch (Mode) {
658    default: llvm_unreachable("Unhandled submode!");
659    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
660    case ARM_AM::db: return ARM::VLDMSDB_UPD;
661    }
662  case ARM::VLDMDIA:
663    switch (Mode) {
664    default: llvm_unreachable("Unhandled submode!");
665    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
666    case ARM_AM::db: return ARM::VLDMDDB_UPD;
667    }
668  case ARM::VSTMSIA:
669    switch (Mode) {
670    default: llvm_unreachable("Unhandled submode!");
671    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
672    case ARM_AM::db: return ARM::VSTMSDB_UPD;
673    }
674  case ARM::VSTMDIA:
675    switch (Mode) {
676    default: llvm_unreachable("Unhandled submode!");
677    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
678    case ARM_AM::db: return ARM::VSTMDDB_UPD;
679    }
680  }
681}
682
683/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
684/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
685///
686/// stmia rn, <ra, rb, rc>
687/// rn := rn + 4 * 3;
688/// =>
689/// stmia rn!, <ra, rb, rc>
690///
691/// rn := rn - 4 * 3;
692/// ldmia rn, <ra, rb, rc>
693/// =>
694/// ldmdb rn!, <ra, rb, rc>
695bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
696                                               MachineBasicBlock::iterator MBBI,
697                                               bool &Advance,
698                                               MachineBasicBlock::iterator &I) {
699  MachineInstr *MI = MBBI;
700  unsigned Base = MI->getOperand(0).getReg();
701  bool BaseKill = MI->getOperand(0).isKill();
702  unsigned Bytes = getLSMultipleTransferSize(MI);
703  unsigned PredReg = 0;
704  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
705  int Opcode = MI->getOpcode();
706  DebugLoc dl = MI->getDebugLoc();
707
708  // Can't use an updating ld/st if the base register is also a dest
709  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
710  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
711    if (MI->getOperand(i).getReg() == Base)
712      return false;
713
714  bool DoMerge = false;
715  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
716
717  // Try merging with the previous instruction.
718  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
719  if (MBBI != BeginMBBI) {
720    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
721    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
722      --PrevMBBI;
723    if (Mode == ARM_AM::ia &&
724        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
725      Mode = ARM_AM::db;
726      DoMerge = true;
727    } else if (Mode == ARM_AM::ib &&
728               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
729      Mode = ARM_AM::da;
730      DoMerge = true;
731    }
732    if (DoMerge)
733      MBB.erase(PrevMBBI);
734  }
735
736  // Try merging with the next instruction.
737  MachineBasicBlock::iterator EndMBBI = MBB.end();
738  if (!DoMerge && MBBI != EndMBBI) {
739    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
740    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
741      ++NextMBBI;
742    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
743        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
744      DoMerge = true;
745    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
746               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
747      DoMerge = true;
748    }
749    if (DoMerge) {
750      if (NextMBBI == I) {
751        Advance = true;
752        ++I;
753      }
754      MBB.erase(NextMBBI);
755    }
756  }
757
758  if (!DoMerge)
759    return false;
760
761  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
762  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
763    .addReg(Base, getDefRegState(true)) // WB base register
764    .addReg(Base, getKillRegState(BaseKill))
765    .addImm(Pred).addReg(PredReg);
766
767  // Transfer the rest of operands.
768  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
769    MIB.addOperand(MI->getOperand(OpNum));
770
771  // Transfer memoperands.
772  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
773
774  MBB.erase(MBBI);
775  return true;
776}
777
778static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
779                                             ARM_AM::AddrOpc Mode) {
780  switch (Opc) {
781  case ARM::LDRi12:
782    return ARM::LDR_PRE_IMM;
783  case ARM::STRi12:
784    return ARM::STR_PRE_IMM;
785  case ARM::VLDRS:
786    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
787  case ARM::VLDRD:
788    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
789  case ARM::VSTRS:
790    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
791  case ARM::VSTRD:
792    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
793  case ARM::t2LDRi8:
794  case ARM::t2LDRi12:
795    return ARM::t2LDR_PRE;
796  case ARM::t2STRi8:
797  case ARM::t2STRi12:
798    return ARM::t2STR_PRE;
799  default: llvm_unreachable("Unhandled opcode!");
800  }
801}
802
803static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
804                                              ARM_AM::AddrOpc Mode) {
805  switch (Opc) {
806  case ARM::LDRi12:
807    return ARM::LDR_POST_IMM;
808  case ARM::STRi12:
809    return ARM::STR_POST_IMM;
810  case ARM::VLDRS:
811    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
812  case ARM::VLDRD:
813    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
814  case ARM::VSTRS:
815    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
816  case ARM::VSTRD:
817    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
818  case ARM::t2LDRi8:
819  case ARM::t2LDRi12:
820    return ARM::t2LDR_POST;
821  case ARM::t2STRi8:
822  case ARM::t2STRi12:
823    return ARM::t2STR_POST;
824  default: llvm_unreachable("Unhandled opcode!");
825  }
826}
827
828/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
829/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
830bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
831                                               MachineBasicBlock::iterator MBBI,
832                                               const TargetInstrInfo *TII,
833                                               bool &Advance,
834                                               MachineBasicBlock::iterator &I) {
835  MachineInstr *MI = MBBI;
836  unsigned Base = MI->getOperand(1).getReg();
837  bool BaseKill = MI->getOperand(1).isKill();
838  unsigned Bytes = getLSMultipleTransferSize(MI);
839  int Opcode = MI->getOpcode();
840  DebugLoc dl = MI->getDebugLoc();
841  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
842                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
843  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
844  if (isi32Load(Opcode) || isi32Store(Opcode))
845    if (MI->getOperand(2).getImm() != 0)
846      return false;
847  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
848    return false;
849
850  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
851  // Can't do the merge if the destination register is the same as the would-be
852  // writeback register.
853  if (isLd && MI->getOperand(0).getReg() == Base)
854    return false;
855
856  unsigned PredReg = 0;
857  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
858  bool DoMerge = false;
859  ARM_AM::AddrOpc AddSub = ARM_AM::add;
860  unsigned NewOpc = 0;
861  // AM2 - 12 bits, thumb2 - 8 bits.
862  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
863
864  // Try merging with the previous instruction.
865  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
866  if (MBBI != BeginMBBI) {
867    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
868    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
869      --PrevMBBI;
870    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
871      DoMerge = true;
872      AddSub = ARM_AM::sub;
873    } else if (!isAM5 &&
874               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
875      DoMerge = true;
876    }
877    if (DoMerge) {
878      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
879      MBB.erase(PrevMBBI);
880    }
881  }
882
883  // Try merging with the next instruction.
884  MachineBasicBlock::iterator EndMBBI = MBB.end();
885  if (!DoMerge && MBBI != EndMBBI) {
886    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
887    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
888      ++NextMBBI;
889    if (!isAM5 &&
890        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
891      DoMerge = true;
892      AddSub = ARM_AM::sub;
893    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
894      DoMerge = true;
895    }
896    if (DoMerge) {
897      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
898      if (NextMBBI == I) {
899        Advance = true;
900        ++I;
901      }
902      MBB.erase(NextMBBI);
903    }
904  }
905
906  if (!DoMerge)
907    return false;
908
909  if (isAM5) {
910    // VLDM[SD}_UPD, VSTM[SD]_UPD
911    // (There are no base-updating versions of VLDR/VSTR instructions, but the
912    // updating load/store-multiple instructions can be used with only one
913    // register.)
914    MachineOperand &MO = MI->getOperand(0);
915    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
916      .addReg(Base, getDefRegState(true)) // WB base register
917      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
918      .addImm(Pred).addReg(PredReg)
919      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
920                            getKillRegState(MO.isKill())));
921  } else if (isLd) {
922    if (isAM2) {
923      // LDR_PRE, LDR_POST
924      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
925        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
926        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
927          .addReg(Base, RegState::Define)
928          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
929      } else {
930        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
931        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
932          .addReg(Base, RegState::Define)
933          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
934      }
935    } else {
936      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
937      // t2LDR_PRE, t2LDR_POST
938      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
939        .addReg(Base, RegState::Define)
940        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
941    }
942  } else {
943    MachineOperand &MO = MI->getOperand(0);
944    // FIXME: post-indexed stores use am2offset_imm, which still encodes
945    // the vestigal zero-reg offset register. When that's fixed, this clause
946    // can be removed entirely.
947    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
948      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
949      // STR_PRE, STR_POST
950      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
951        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
952        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
953    } else {
954      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
955      // t2STR_PRE, t2STR_POST
956      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
957        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
958        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
959    }
960  }
961  MBB.erase(MBBI);
962
963  return true;
964}
965
966/// isMemoryOp - Returns true if instruction is a memory operation that this
967/// pass is capable of operating on.
968static bool isMemoryOp(const MachineInstr *MI) {
969  // When no memory operands are present, conservatively assume unaligned,
970  // volatile, unfoldable.
971  if (!MI->hasOneMemOperand())
972    return false;
973
974  const MachineMemOperand *MMO = *MI->memoperands_begin();
975
976  // Don't touch volatile memory accesses - we may be changing their order.
977  if (MMO->isVolatile())
978    return false;
979
980  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
981  // not.
982  if (MMO->getAlignment() < 4)
983    return false;
984
985  // str <undef> could probably be eliminated entirely, but for now we just want
986  // to avoid making a mess of it.
987  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
988  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
989      MI->getOperand(0).isUndef())
990    return false;
991
992  // Likewise don't mess with references to undefined addresses.
993  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
994      MI->getOperand(1).isUndef())
995    return false;
996
997  int Opcode = MI->getOpcode();
998  switch (Opcode) {
999  default: break;
1000  case ARM::VLDRS:
1001  case ARM::VSTRS:
1002    return MI->getOperand(1).isReg();
1003  case ARM::VLDRD:
1004  case ARM::VSTRD:
1005    return MI->getOperand(1).isReg();
1006  case ARM::LDRi12:
1007  case ARM::STRi12:
1008  case ARM::t2LDRi8:
1009  case ARM::t2LDRi12:
1010  case ARM::t2STRi8:
1011  case ARM::t2STRi12:
1012    return MI->getOperand(1).isReg();
1013  }
1014  return false;
1015}
1016
1017/// AdvanceRS - Advance register scavenger to just before the earliest memory
1018/// op that is being merged.
1019void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1020  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1021  unsigned Position = MemOps[0].Position;
1022  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1023    if (MemOps[i].Position < Position) {
1024      Position = MemOps[i].Position;
1025      Loc = MemOps[i].MBBI;
1026    }
1027  }
1028
1029  if (Loc != MBB.begin())
1030    RS->forward(prior(Loc));
1031}
1032
1033static int getMemoryOpOffset(const MachineInstr *MI) {
1034  int Opcode = MI->getOpcode();
1035  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1036  unsigned NumOperands = MI->getDesc().getNumOperands();
1037  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1038
1039  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1040      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1041      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1042      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1043    return OffField;
1044
1045  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1046    : ARM_AM::getAM5Offset(OffField) * 4;
1047  if (isAM3) {
1048    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1049      Offset = -Offset;
1050  } else {
1051    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1052      Offset = -Offset;
1053  }
1054  return Offset;
1055}
1056
1057static void InsertLDR_STR(MachineBasicBlock &MBB,
1058                          MachineBasicBlock::iterator &MBBI,
1059                          int Offset, bool isDef,
1060                          DebugLoc dl, unsigned NewOpc,
1061                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1062                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1063                          bool OffKill, bool OffUndef,
1064                          ARMCC::CondCodes Pred, unsigned PredReg,
1065                          const TargetInstrInfo *TII, bool isT2) {
1066  if (isDef) {
1067    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1068                                      TII->get(NewOpc))
1069      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1070      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1071    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1072  } else {
1073    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1074                                      TII->get(NewOpc))
1075      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1076      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1077    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1078  }
1079}
1080
1081bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1082                                          MachineBasicBlock::iterator &MBBI) {
1083  MachineInstr *MI = &*MBBI;
1084  unsigned Opcode = MI->getOpcode();
1085  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1086      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1087    const MachineOperand &BaseOp = MI->getOperand(2);
1088    unsigned BaseReg = BaseOp.getReg();
1089    unsigned EvenReg = MI->getOperand(0).getReg();
1090    unsigned OddReg  = MI->getOperand(1).getReg();
1091    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1092    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1093    // ARM errata 602117: LDRD with base in list may result in incorrect base
1094    // register when interrupted or faulted.
1095    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1096    if (!Errata602117 &&
1097        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1098      return false;
1099
1100    MachineBasicBlock::iterator NewBBI = MBBI;
1101    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1102    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1103    bool EvenDeadKill = isLd ?
1104      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1105    bool EvenUndef = MI->getOperand(0).isUndef();
1106    bool OddDeadKill  = isLd ?
1107      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1108    bool OddUndef = MI->getOperand(1).isUndef();
1109    bool BaseKill = BaseOp.isKill();
1110    bool BaseUndef = BaseOp.isUndef();
1111    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1112    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1113    int OffImm = getMemoryOpOffset(MI);
1114    unsigned PredReg = 0;
1115    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1116
1117    if (OddRegNum > EvenRegNum && OffImm == 0) {
1118      // Ascending register numbers and no offset. It's safe to change it to a
1119      // ldm or stm.
1120      unsigned NewOpc = (isLd)
1121        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1122        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1123      if (isLd) {
1124        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1125          .addReg(BaseReg, getKillRegState(BaseKill))
1126          .addImm(Pred).addReg(PredReg)
1127          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1128          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1129        ++NumLDRD2LDM;
1130      } else {
1131        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1132          .addReg(BaseReg, getKillRegState(BaseKill))
1133          .addImm(Pred).addReg(PredReg)
1134          .addReg(EvenReg,
1135                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1136          .addReg(OddReg,
1137                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1138        ++NumSTRD2STM;
1139      }
1140      NewBBI = llvm::prior(MBBI);
1141    } else {
1142      // Split into two instructions.
1143      unsigned NewOpc = (isLd)
1144        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1145        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1146      DebugLoc dl = MBBI->getDebugLoc();
1147      // If this is a load and base register is killed, it may have been
1148      // re-defed by the load, make sure the first load does not clobber it.
1149      if (isLd &&
1150          (BaseKill || OffKill) &&
1151          (TRI->regsOverlap(EvenReg, BaseReg))) {
1152        assert(!TRI->regsOverlap(OddReg, BaseReg));
1153        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1154                      OddReg, OddDeadKill, false,
1155                      BaseReg, false, BaseUndef, false, OffUndef,
1156                      Pred, PredReg, TII, isT2);
1157        NewBBI = llvm::prior(MBBI);
1158        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1159                      EvenReg, EvenDeadKill, false,
1160                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1161                      Pred, PredReg, TII, isT2);
1162      } else {
1163        if (OddReg == EvenReg && EvenDeadKill) {
1164          // If the two source operands are the same, the kill marker is
1165          // probably on the first one. e.g.
1166          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1167          EvenDeadKill = false;
1168          OddDeadKill = true;
1169        }
1170        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1171                      EvenReg, EvenDeadKill, EvenUndef,
1172                      BaseReg, false, BaseUndef, false, OffUndef,
1173                      Pred, PredReg, TII, isT2);
1174        NewBBI = llvm::prior(MBBI);
1175        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1176                      OddReg, OddDeadKill, OddUndef,
1177                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1178                      Pred, PredReg, TII, isT2);
1179      }
1180      if (isLd)
1181        ++NumLDRD2LDR;
1182      else
1183        ++NumSTRD2STR;
1184    }
1185
1186    MBB.erase(MI);
1187    MBBI = NewBBI;
1188    return true;
1189  }
1190  return false;
1191}
1192
1193/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1194/// ops of the same base and incrementing offset into LDM / STM ops.
1195bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1196  unsigned NumMerges = 0;
1197  unsigned NumMemOps = 0;
1198  MemOpQueue MemOps;
1199  unsigned CurrBase = 0;
1200  int CurrOpc = -1;
1201  unsigned CurrSize = 0;
1202  ARMCC::CondCodes CurrPred = ARMCC::AL;
1203  unsigned CurrPredReg = 0;
1204  unsigned Position = 0;
1205  SmallVector<MachineBasicBlock::iterator,4> Merges;
1206
1207  RS->enterBasicBlock(&MBB);
1208  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1209  while (MBBI != E) {
1210    if (FixInvalidRegPairOp(MBB, MBBI))
1211      continue;
1212
1213    bool Advance  = false;
1214    bool TryMerge = false;
1215    bool Clobber  = false;
1216
1217    bool isMemOp = isMemoryOp(MBBI);
1218    if (isMemOp) {
1219      int Opcode = MBBI->getOpcode();
1220      unsigned Size = getLSMultipleTransferSize(MBBI);
1221      const MachineOperand &MO = MBBI->getOperand(0);
1222      unsigned Reg = MO.getReg();
1223      bool isKill = MO.isDef() ? false : MO.isKill();
1224      unsigned Base = MBBI->getOperand(1).getReg();
1225      unsigned PredReg = 0;
1226      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1227      int Offset = getMemoryOpOffset(MBBI);
1228      // Watch out for:
1229      // r4 := ldr [r5]
1230      // r5 := ldr [r5, #4]
1231      // r6 := ldr [r5, #8]
1232      //
1233      // The second ldr has effectively broken the chain even though it
1234      // looks like the later ldr(s) use the same base register. Try to
1235      // merge the ldr's so far, including this one. But don't try to
1236      // combine the following ldr(s).
1237      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1238      if (CurrBase == 0 && !Clobber) {
1239        // Start of a new chain.
1240        CurrBase = Base;
1241        CurrOpc  = Opcode;
1242        CurrSize = Size;
1243        CurrPred = Pred;
1244        CurrPredReg = PredReg;
1245        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1246        ++NumMemOps;
1247        Advance = true;
1248      } else {
1249        if (Clobber) {
1250          TryMerge = true;
1251          Advance = true;
1252        }
1253
1254        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1255          // No need to match PredReg.
1256          // Continue adding to the queue.
1257          if (Offset > MemOps.back().Offset) {
1258            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1259                                             Position, MBBI));
1260            ++NumMemOps;
1261            Advance = true;
1262          } else {
1263            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1264                 I != E; ++I) {
1265              if (Offset < I->Offset) {
1266                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1267                                                 Position, MBBI));
1268                ++NumMemOps;
1269                Advance = true;
1270                break;
1271              } else if (Offset == I->Offset) {
1272                // Collision! This can't be merged!
1273                break;
1274              }
1275            }
1276          }
1277        }
1278      }
1279    }
1280
1281    if (MBBI->isDebugValue()) {
1282      ++MBBI;
1283      if (MBBI == E)
1284        // Reach the end of the block, try merging the memory instructions.
1285        TryMerge = true;
1286    } else if (Advance) {
1287      ++Position;
1288      ++MBBI;
1289      if (MBBI == E)
1290        // Reach the end of the block, try merging the memory instructions.
1291        TryMerge = true;
1292    } else
1293      TryMerge = true;
1294
1295    if (TryMerge) {
1296      if (NumMemOps > 1) {
1297        // Try to find a free register to use as a new base in case it's needed.
1298        // First advance to the instruction just before the start of the chain.
1299        AdvanceRS(MBB, MemOps);
1300        // Find a scratch register.
1301        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1302        // Process the load / store instructions.
1303        RS->forward(prior(MBBI));
1304
1305        // Merge ops.
1306        Merges.clear();
1307        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1308                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1309
1310        // Try folding preceding/trailing base inc/dec into the generated
1311        // LDM/STM ops.
1312        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1313          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1314            ++NumMerges;
1315        NumMerges += Merges.size();
1316
1317        // Try folding preceding/trailing base inc/dec into those load/store
1318        // that were not merged to form LDM/STM ops.
1319        for (unsigned i = 0; i != NumMemOps; ++i)
1320          if (!MemOps[i].Merged)
1321            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1322              ++NumMerges;
1323
1324        // RS may be pointing to an instruction that's deleted.
1325        RS->skipTo(prior(MBBI));
1326      } else if (NumMemOps == 1) {
1327        // Try folding preceding/trailing base inc/dec into the single
1328        // load/store.
1329        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1330          ++NumMerges;
1331          RS->forward(prior(MBBI));
1332        }
1333      }
1334
1335      CurrBase = 0;
1336      CurrOpc = -1;
1337      CurrSize = 0;
1338      CurrPred = ARMCC::AL;
1339      CurrPredReg = 0;
1340      if (NumMemOps) {
1341        MemOps.clear();
1342        NumMemOps = 0;
1343      }
1344
1345      // If iterator hasn't been advanced and this is not a memory op, skip it.
1346      // It can't start a new chain anyway.
1347      if (!Advance && !isMemOp && MBBI != E) {
1348        ++Position;
1349        ++MBBI;
1350      }
1351    }
1352  }
1353  return NumMerges > 0;
1354}
1355
1356/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1357/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1358/// directly restore the value of LR into pc.
1359///   ldmfd sp!, {..., lr}
1360///   bx lr
1361/// or
1362///   ldmfd sp!, {..., lr}
1363///   mov pc, lr
1364/// =>
1365///   ldmfd sp!, {..., pc}
1366bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1367  if (MBB.empty()) return false;
1368
1369  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1370  if (MBBI != MBB.begin() &&
1371      (MBBI->getOpcode() == ARM::BX_RET ||
1372       MBBI->getOpcode() == ARM::tBX_RET ||
1373       MBBI->getOpcode() == ARM::MOVPCLR)) {
1374    MachineInstr *PrevMI = prior(MBBI);
1375    unsigned Opcode = PrevMI->getOpcode();
1376    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1377        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1378        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1379      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1380      if (MO.getReg() != ARM::LR)
1381        return false;
1382      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1383      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1384              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1385      PrevMI->setDesc(TII->get(NewOpc));
1386      MO.setReg(ARM::PC);
1387      PrevMI->copyImplicitOps(&*MBBI);
1388      MBB.erase(MBBI);
1389      return true;
1390    }
1391  }
1392  return false;
1393}
1394
1395bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1396  const TargetMachine &TM = Fn.getTarget();
1397  AFI = Fn.getInfo<ARMFunctionInfo>();
1398  TII = TM.getInstrInfo();
1399  TRI = TM.getRegisterInfo();
1400  STI = &TM.getSubtarget<ARMSubtarget>();
1401  RS = new RegScavenger();
1402  isThumb2 = AFI->isThumb2Function();
1403
1404  bool Modified = false;
1405  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1406       ++MFI) {
1407    MachineBasicBlock &MBB = *MFI;
1408    Modified |= LoadStoreMultipleOpti(MBB);
1409    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1410      Modified |= MergeReturnIntoLDM(MBB);
1411  }
1412
1413  delete RS;
1414
1415  // Kill flags aren't updated accurately by this pass.
1416  Fn.getRegInfo().invalidateLiveness();
1417
1418  return Modified;
1419}
1420
1421
1422/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1423/// load / stores from consecutive locations close to make it more
1424/// likely they will be combined later.
1425
1426namespace {
1427  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1428    static char ID;
1429    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1430
1431    const TargetData *TD;
1432    const TargetInstrInfo *TII;
1433    const TargetRegisterInfo *TRI;
1434    const ARMSubtarget *STI;
1435    MachineRegisterInfo *MRI;
1436    MachineFunction *MF;
1437
1438    virtual bool runOnMachineFunction(MachineFunction &Fn);
1439
1440    virtual const char *getPassName() const {
1441      return "ARM pre- register allocation load / store optimization pass";
1442    }
1443
1444  private:
1445    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1446                          unsigned &NewOpc, unsigned &EvenReg,
1447                          unsigned &OddReg, unsigned &BaseReg,
1448                          int &Offset,
1449                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1450                          bool &isT2);
1451    bool RescheduleOps(MachineBasicBlock *MBB,
1452                       SmallVector<MachineInstr*, 4> &Ops,
1453                       unsigned Base, bool isLd,
1454                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1455    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1456  };
1457  char ARMPreAllocLoadStoreOpt::ID = 0;
1458}
1459
1460bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1461  TD  = Fn.getTarget().getTargetData();
1462  TII = Fn.getTarget().getInstrInfo();
1463  TRI = Fn.getTarget().getRegisterInfo();
1464  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1465  MRI = &Fn.getRegInfo();
1466  MF  = &Fn;
1467
1468  bool Modified = false;
1469  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1470       ++MFI)
1471    Modified |= RescheduleLoadStoreInstrs(MFI);
1472
1473  return Modified;
1474}
1475
1476static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1477                                      MachineBasicBlock::iterator I,
1478                                      MachineBasicBlock::iterator E,
1479                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1480                                      SmallSet<unsigned, 4> &MemRegs,
1481                                      const TargetRegisterInfo *TRI) {
1482  // Are there stores / loads / calls between them?
1483  // FIXME: This is overly conservative. We should make use of alias information
1484  // some day.
1485  SmallSet<unsigned, 4> AddedRegPressure;
1486  while (++I != E) {
1487    if (I->isDebugValue() || MemOps.count(&*I))
1488      continue;
1489    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1490      return false;
1491    if (isLd && I->mayStore())
1492      return false;
1493    if (!isLd) {
1494      if (I->mayLoad())
1495        return false;
1496      // It's not safe to move the first 'str' down.
1497      // str r1, [r0]
1498      // strh r5, [r0]
1499      // str r4, [r0, #+4]
1500      if (I->mayStore())
1501        return false;
1502    }
1503    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1504      MachineOperand &MO = I->getOperand(j);
1505      if (!MO.isReg())
1506        continue;
1507      unsigned Reg = MO.getReg();
1508      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1509        return false;
1510      if (Reg != Base && !MemRegs.count(Reg))
1511        AddedRegPressure.insert(Reg);
1512    }
1513  }
1514
1515  // Estimate register pressure increase due to the transformation.
1516  if (MemRegs.size() <= 4)
1517    // Ok if we are moving small number of instructions.
1518    return true;
1519  return AddedRegPressure.size() <= MemRegs.size() * 2;
1520}
1521
1522
1523/// Copy Op0 and Op1 operands into a new array assigned to MI.
1524static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1525                                   MachineInstr *Op1) {
1526  assert(MI->memoperands_empty() && "expected a new machineinstr");
1527  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1528    + (Op1->memoperands_end() - Op1->memoperands_begin());
1529
1530  MachineFunction *MF = MI->getParent()->getParent();
1531  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1532  MachineSDNode::mmo_iterator MemEnd =
1533    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1534  MemEnd =
1535    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1536  MI->setMemRefs(MemBegin, MemEnd);
1537}
1538
1539bool
1540ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1541                                          DebugLoc &dl,
1542                                          unsigned &NewOpc, unsigned &EvenReg,
1543                                          unsigned &OddReg, unsigned &BaseReg,
1544                                          int &Offset, unsigned &PredReg,
1545                                          ARMCC::CondCodes &Pred,
1546                                          bool &isT2) {
1547  // Make sure we're allowed to generate LDRD/STRD.
1548  if (!STI->hasV5TEOps())
1549    return false;
1550
1551  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1552  unsigned Scale = 1;
1553  unsigned Opcode = Op0->getOpcode();
1554  if (Opcode == ARM::LDRi12)
1555    NewOpc = ARM::LDRD;
1556  else if (Opcode == ARM::STRi12)
1557    NewOpc = ARM::STRD;
1558  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1559    NewOpc = ARM::t2LDRDi8;
1560    Scale = 4;
1561    isT2 = true;
1562  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1563    NewOpc = ARM::t2STRDi8;
1564    Scale = 4;
1565    isT2 = true;
1566  } else
1567    return false;
1568
1569  // Make sure the base address satisfies i64 ld / st alignment requirement.
1570  if (!Op0->hasOneMemOperand() ||
1571      !(*Op0->memoperands_begin())->getValue() ||
1572      (*Op0->memoperands_begin())->isVolatile())
1573    return false;
1574
1575  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1576  const Function *Func = MF->getFunction();
1577  unsigned ReqAlign = STI->hasV6Ops()
1578    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1579    : 8;  // Pre-v6 need 8-byte align
1580  if (Align < ReqAlign)
1581    return false;
1582
1583  // Then make sure the immediate offset fits.
1584  int OffImm = getMemoryOpOffset(Op0);
1585  if (isT2) {
1586    int Limit = (1 << 8) * Scale;
1587    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1588      return false;
1589    Offset = OffImm;
1590  } else {
1591    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1592    if (OffImm < 0) {
1593      AddSub = ARM_AM::sub;
1594      OffImm = - OffImm;
1595    }
1596    int Limit = (1 << 8) * Scale;
1597    if (OffImm >= Limit || (OffImm & (Scale-1)))
1598      return false;
1599    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1600  }
1601  EvenReg = Op0->getOperand(0).getReg();
1602  OddReg  = Op1->getOperand(0).getReg();
1603  if (EvenReg == OddReg)
1604    return false;
1605  BaseReg = Op0->getOperand(1).getReg();
1606  Pred = getInstrPredicate(Op0, PredReg);
1607  dl = Op0->getDebugLoc();
1608  return true;
1609}
1610
1611namespace {
1612  struct OffsetCompare {
1613    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1614      int LOffset = getMemoryOpOffset(LHS);
1615      int ROffset = getMemoryOpOffset(RHS);
1616      assert(LHS == RHS || LOffset != ROffset);
1617      return LOffset > ROffset;
1618    }
1619  };
1620}
1621
1622bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1623                                 SmallVector<MachineInstr*, 4> &Ops,
1624                                 unsigned Base, bool isLd,
1625                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1626  bool RetVal = false;
1627
1628  // Sort by offset (in reverse order).
1629  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1630
1631  // The loads / stores of the same base are in order. Scan them from first to
1632  // last and check for the following:
1633  // 1. Any def of base.
1634  // 2. Any gaps.
1635  while (Ops.size() > 1) {
1636    unsigned FirstLoc = ~0U;
1637    unsigned LastLoc = 0;
1638    MachineInstr *FirstOp = 0;
1639    MachineInstr *LastOp = 0;
1640    int LastOffset = 0;
1641    unsigned LastOpcode = 0;
1642    unsigned LastBytes = 0;
1643    unsigned NumMove = 0;
1644    for (int i = Ops.size() - 1; i >= 0; --i) {
1645      MachineInstr *Op = Ops[i];
1646      unsigned Loc = MI2LocMap[Op];
1647      if (Loc <= FirstLoc) {
1648        FirstLoc = Loc;
1649        FirstOp = Op;
1650      }
1651      if (Loc >= LastLoc) {
1652        LastLoc = Loc;
1653        LastOp = Op;
1654      }
1655
1656      unsigned LSMOpcode
1657        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1658      if (LastOpcode && LSMOpcode != LastOpcode)
1659        break;
1660
1661      int Offset = getMemoryOpOffset(Op);
1662      unsigned Bytes = getLSMultipleTransferSize(Op);
1663      if (LastBytes) {
1664        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1665          break;
1666      }
1667      LastOffset = Offset;
1668      LastBytes = Bytes;
1669      LastOpcode = LSMOpcode;
1670      if (++NumMove == 8) // FIXME: Tune this limit.
1671        break;
1672    }
1673
1674    if (NumMove <= 1)
1675      Ops.pop_back();
1676    else {
1677      SmallPtrSet<MachineInstr*, 4> MemOps;
1678      SmallSet<unsigned, 4> MemRegs;
1679      for (int i = NumMove-1; i >= 0; --i) {
1680        MemOps.insert(Ops[i]);
1681        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1682      }
1683
1684      // Be conservative, if the instructions are too far apart, don't
1685      // move them. We want to limit the increase of register pressure.
1686      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1687      if (DoMove)
1688        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1689                                           MemOps, MemRegs, TRI);
1690      if (!DoMove) {
1691        for (unsigned i = 0; i != NumMove; ++i)
1692          Ops.pop_back();
1693      } else {
1694        // This is the new location for the loads / stores.
1695        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1696        while (InsertPos != MBB->end()
1697               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1698          ++InsertPos;
1699
1700        // If we are moving a pair of loads / stores, see if it makes sense
1701        // to try to allocate a pair of registers that can form register pairs.
1702        MachineInstr *Op0 = Ops.back();
1703        MachineInstr *Op1 = Ops[Ops.size()-2];
1704        unsigned EvenReg = 0, OddReg = 0;
1705        unsigned BaseReg = 0, PredReg = 0;
1706        ARMCC::CondCodes Pred = ARMCC::AL;
1707        bool isT2 = false;
1708        unsigned NewOpc = 0;
1709        int Offset = 0;
1710        DebugLoc dl;
1711        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1712                                             EvenReg, OddReg, BaseReg,
1713                                             Offset, PredReg, Pred, isT2)) {
1714          Ops.pop_back();
1715          Ops.pop_back();
1716
1717          const MCInstrDesc &MCID = TII->get(NewOpc);
1718          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1719          MRI->constrainRegClass(EvenReg, TRC);
1720          MRI->constrainRegClass(OddReg, TRC);
1721
1722          // Form the pair instruction.
1723          if (isLd) {
1724            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1725              .addReg(EvenReg, RegState::Define)
1726              .addReg(OddReg, RegState::Define)
1727              .addReg(BaseReg);
1728            // FIXME: We're converting from LDRi12 to an insn that still
1729            // uses addrmode2, so we need an explicit offset reg. It should
1730            // always by reg0 since we're transforming LDRi12s.
1731            if (!isT2)
1732              MIB.addReg(0);
1733            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1734            concatenateMemOperands(MIB, Op0, Op1);
1735            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1736            ++NumLDRDFormed;
1737          } else {
1738            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1739              .addReg(EvenReg)
1740              .addReg(OddReg)
1741              .addReg(BaseReg);
1742            // FIXME: We're converting from LDRi12 to an insn that still
1743            // uses addrmode2, so we need an explicit offset reg. It should
1744            // always by reg0 since we're transforming STRi12s.
1745            if (!isT2)
1746              MIB.addReg(0);
1747            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1748            concatenateMemOperands(MIB, Op0, Op1);
1749            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1750            ++NumSTRDFormed;
1751          }
1752          MBB->erase(Op0);
1753          MBB->erase(Op1);
1754
1755          // Add register allocation hints to form register pairs.
1756          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1757          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1758        } else {
1759          for (unsigned i = 0; i != NumMove; ++i) {
1760            MachineInstr *Op = Ops.back();
1761            Ops.pop_back();
1762            MBB->splice(InsertPos, MBB, Op);
1763          }
1764        }
1765
1766        NumLdStMoved += NumMove;
1767        RetVal = true;
1768      }
1769    }
1770  }
1771
1772  return RetVal;
1773}
1774
1775bool
1776ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1777  bool RetVal = false;
1778
1779  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1780  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1781  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1782  SmallVector<unsigned, 4> LdBases;
1783  SmallVector<unsigned, 4> StBases;
1784
1785  unsigned Loc = 0;
1786  MachineBasicBlock::iterator MBBI = MBB->begin();
1787  MachineBasicBlock::iterator E = MBB->end();
1788  while (MBBI != E) {
1789    for (; MBBI != E; ++MBBI) {
1790      MachineInstr *MI = MBBI;
1791      if (MI->isCall() || MI->isTerminator()) {
1792        // Stop at barriers.
1793        ++MBBI;
1794        break;
1795      }
1796
1797      if (!MI->isDebugValue())
1798        MI2LocMap[MI] = ++Loc;
1799
1800      if (!isMemoryOp(MI))
1801        continue;
1802      unsigned PredReg = 0;
1803      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1804        continue;
1805
1806      int Opc = MI->getOpcode();
1807      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1808      unsigned Base = MI->getOperand(1).getReg();
1809      int Offset = getMemoryOpOffset(MI);
1810
1811      bool StopHere = false;
1812      if (isLd) {
1813        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1814          Base2LdsMap.find(Base);
1815        if (BI != Base2LdsMap.end()) {
1816          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1817            if (Offset == getMemoryOpOffset(BI->second[i])) {
1818              StopHere = true;
1819              break;
1820            }
1821          }
1822          if (!StopHere)
1823            BI->second.push_back(MI);
1824        } else {
1825          SmallVector<MachineInstr*, 4> MIs;
1826          MIs.push_back(MI);
1827          Base2LdsMap[Base] = MIs;
1828          LdBases.push_back(Base);
1829        }
1830      } else {
1831        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1832          Base2StsMap.find(Base);
1833        if (BI != Base2StsMap.end()) {
1834          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1835            if (Offset == getMemoryOpOffset(BI->second[i])) {
1836              StopHere = true;
1837              break;
1838            }
1839          }
1840          if (!StopHere)
1841            BI->second.push_back(MI);
1842        } else {
1843          SmallVector<MachineInstr*, 4> MIs;
1844          MIs.push_back(MI);
1845          Base2StsMap[Base] = MIs;
1846          StBases.push_back(Base);
1847        }
1848      }
1849
1850      if (StopHere) {
1851        // Found a duplicate (a base+offset combination that's seen earlier).
1852        // Backtrack.
1853        --Loc;
1854        break;
1855      }
1856    }
1857
1858    // Re-schedule loads.
1859    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1860      unsigned Base = LdBases[i];
1861      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1862      if (Lds.size() > 1)
1863        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1864    }
1865
1866    // Re-schedule stores.
1867    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1868      unsigned Base = StBases[i];
1869      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1870      if (Sts.size() > 1)
1871        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1872    }
1873
1874    if (MBBI != E) {
1875      Base2LdsMap.clear();
1876      Base2StsMap.clear();
1877      LdBases.clear();
1878      StBases.clear();
1879    }
1880  }
1881
1882  return RetVal;
1883}
1884
1885
1886/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1887/// optimization pass.
1888FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1889  if (PreAlloc)
1890    return new ARMPreAllocLoadStoreOpt();
1891  return new ARMLoadStoreOpt();
1892}
1893