ARMLoadStoreOptimizer.cpp revision 90c579de5a383cee278acc3f7e7b9d0a656e6a35
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMBaseInstrInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMRegisterInfo.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/Target/TargetData.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetMachine.h"
32#include "llvm/Target/TargetRegisterInfo.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallSet.h"
38#include "llvm/ADT/SmallVector.h"
39#include "llvm/ADT/Statistic.h"
40using namespace llvm;
41
42STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43STATISTIC(NumSTMGened , "Number of stm instructions generated");
44STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
50STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
51STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
52STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
53
54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55/// load / store instructions to form ldm / stm instructions.
56
57namespace {
58  struct ARMLoadStoreOpt : public MachineFunctionPass {
59    static char ID;
60    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
61
62    const TargetInstrInfo *TII;
63    const TargetRegisterInfo *TRI;
64    ARMFunctionInfo *AFI;
65    RegScavenger *RS;
66    bool isThumb2;
67
68    virtual bool runOnMachineFunction(MachineFunction &Fn);
69
70    virtual const char *getPassName() const {
71      return "ARM load / store optimization pass";
72    }
73
74  private:
75    struct MemOpQueueEntry {
76      int Offset;
77      unsigned Reg;
78      bool isKill;
79      unsigned Position;
80      MachineBasicBlock::iterator MBBI;
81      bool Merged;
82      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83                      MachineBasicBlock::iterator i)
84        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
85    };
86    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87    typedef MemOpQueue::iterator MemOpQueueIter;
88
89    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90                  int Offset, unsigned Base, bool BaseKill, int Opcode,
91                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93    void MergeOpsUpdate(MachineBasicBlock &MBB,
94                        MemOpQueue &MemOps,
95                        unsigned memOpsBegin,
96                        unsigned memOpsEnd,
97                        unsigned insertAfter,
98                        int Offset,
99                        unsigned Base,
100                        bool BaseKill,
101                        int Opcode,
102                        ARMCC::CondCodes Pred,
103                        unsigned PredReg,
104                        unsigned Scratch,
105                        DebugLoc dl,
106                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108                      int Opcode, unsigned Size,
109                      ARMCC::CondCodes Pred, unsigned PredReg,
110                      unsigned Scratch, MemOpQueue &MemOps,
111                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
112
113    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115                             MachineBasicBlock::iterator &MBBI);
116    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117                                  MachineBasicBlock::iterator MBBI,
118                                  const TargetInstrInfo *TII,
119                                  bool &Advance,
120                                  MachineBasicBlock::iterator &I);
121    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122                                   MachineBasicBlock::iterator MBBI,
123                                   bool &Advance,
124                                   MachineBasicBlock::iterator &I);
125    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
127  };
128  char ARMLoadStoreOpt::ID = 0;
129}
130
131static int getLoadStoreMultipleOpcode(int Opcode) {
132  switch (Opcode) {
133  case ARM::LDR:
134    ++NumLDMGened;
135    return ARM::LDM;
136  case ARM::STR:
137    ++NumSTMGened;
138    return ARM::STM;
139  case ARM::t2LDRi8:
140  case ARM::t2LDRi12:
141    ++NumLDMGened;
142    return ARM::t2LDM;
143  case ARM::t2STRi8:
144  case ARM::t2STRi12:
145    ++NumSTMGened;
146    return ARM::t2STM;
147  case ARM::VLDRS:
148    ++NumVLDMGened;
149    return ARM::VLDMS;
150  case ARM::VSTRS:
151    ++NumVSTMGened;
152    return ARM::VSTMS;
153  case ARM::VLDRD:
154    ++NumVLDMGened;
155    return ARM::VLDMD;
156  case ARM::VSTRD:
157    ++NumVSTMGened;
158    return ARM::VSTMD;
159  default: llvm_unreachable("Unhandled opcode!");
160  }
161  return 0;
162}
163
164static bool isT2i32Load(unsigned Opc) {
165  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
166}
167
168static bool isi32Load(unsigned Opc) {
169  return Opc == ARM::LDR || isT2i32Load(Opc);
170}
171
172static bool isT2i32Store(unsigned Opc) {
173  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
174}
175
176static bool isi32Store(unsigned Opc) {
177  return Opc == ARM::STR || isT2i32Store(Opc);
178}
179
180/// MergeOps - Create and insert a LDM or STM with Base as base register and
181/// registers in Regs as the register operands that would be loaded / stored.
182/// It returns true if the transformation is done.
183bool
184ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
185                          MachineBasicBlock::iterator MBBI,
186                          int Offset, unsigned Base, bool BaseKill,
187                          int Opcode, ARMCC::CondCodes Pred,
188                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
189                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
190  // Only a single register to load / store. Don't bother.
191  unsigned NumRegs = Regs.size();
192  if (NumRegs <= 1)
193    return false;
194
195  ARM_AM::AMSubMode Mode = ARM_AM::ia;
196  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
197  if (isAM4 && Offset == 4) {
198    if (isThumb2)
199      // Thumb2 does not support ldmib / stmib.
200      return false;
201    Mode = ARM_AM::ib;
202  } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
203    if (isThumb2)
204      // Thumb2 does not support ldmda / stmda.
205      return false;
206    Mode = ARM_AM::da;
207  } else if (isAM4 && Offset == -4 * (int)NumRegs) {
208    Mode = ARM_AM::db;
209  } else if (Offset != 0) {
210    // If starting offset isn't zero, insert a MI to materialize a new base.
211    // But only do so if it is cost effective, i.e. merging more than two
212    // loads / stores.
213    if (NumRegs <= 2)
214      return false;
215
216    unsigned NewBase;
217    if (isi32Load(Opcode))
218      // If it is a load, then just use one of the destination register to
219      // use as the new base.
220      NewBase = Regs[NumRegs-1].first;
221    else {
222      // Use the scratch register to use as a new base.
223      NewBase = Scratch;
224      if (NewBase == 0)
225        return false;
226    }
227    int BaseOpc = !isThumb2
228      ? ARM::ADDri
229      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
230    if (Offset < 0) {
231      BaseOpc = !isThumb2
232        ? ARM::SUBri
233        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
234      Offset = - Offset;
235    }
236    int ImmedOffset = isThumb2
237      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
238    if (ImmedOffset == -1)
239      // FIXME: Try t2ADDri12 or t2SUBri12?
240      return false;  // Probably not worth it then.
241
242    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
243      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
244      .addImm(Pred).addReg(PredReg).addReg(0);
245    Base = NewBase;
246    BaseKill = true;  // New base is always killed right its use.
247  }
248
249  bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
250  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
251                Opcode == ARM::VLDRD);
252  Opcode = getLoadStoreMultipleOpcode(Opcode);
253  MachineInstrBuilder MIB = (isAM4)
254    ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255        .addReg(Base, getKillRegState(BaseKill))
256        .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
257    : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
258        .addReg(Base, getKillRegState(BaseKill))
259        .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
260        .addImm(Pred).addReg(PredReg);
261  for (unsigned i = 0; i != NumRegs; ++i)
262    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
263                     | getKillRegState(Regs[i].second));
264
265  return true;
266}
267
268// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
269// success.
270void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
271                                     MemOpQueue &memOps,
272                                     unsigned memOpsBegin, unsigned memOpsEnd,
273                                     unsigned insertAfter, int Offset,
274                                     unsigned Base, bool BaseKill,
275                                     int Opcode,
276                                     ARMCC::CondCodes Pred, unsigned PredReg,
277                                     unsigned Scratch,
278                                     DebugLoc dl,
279                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
280  // First calculate which of the registers should be killed by the merged
281  // instruction.
282  const unsigned insertPos = memOps[insertAfter].Position;
283
284  SmallSet<unsigned, 4> UnavailRegs;
285  SmallSet<unsigned, 4> KilledRegs;
286  DenseMap<unsigned, unsigned> Killer;
287  for (unsigned i = 0; i < memOpsBegin; ++i) {
288    if (memOps[i].Position < insertPos && memOps[i].isKill) {
289      unsigned Reg = memOps[i].Reg;
290      if (memOps[i].Merged)
291        UnavailRegs.insert(Reg);
292      else {
293        KilledRegs.insert(Reg);
294        Killer[Reg] = i;
295      }
296    }
297  }
298  for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
299    if (memOps[i].Position < insertPos && memOps[i].isKill) {
300      unsigned Reg = memOps[i].Reg;
301      KilledRegs.insert(Reg);
302      Killer[Reg] = i;
303    }
304  }
305
306  SmallVector<std::pair<unsigned, bool>, 8> Regs;
307  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
308    unsigned Reg = memOps[i].Reg;
309    if (UnavailRegs.count(Reg))
310      // Register is killed before and it's not easy / possible to update the
311      // kill marker on already merged instructions. Abort.
312      return;
313
314    // If we are inserting the merged operation after an unmerged operation that
315    // uses the same register, make sure to transfer any kill flag.
316    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
317    Regs.push_back(std::make_pair(Reg, isKill));
318  }
319
320  // Try to do the merge.
321  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
322  ++Loc;
323  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
324                Pred, PredReg, Scratch, dl, Regs))
325    return;
326
327  // Merge succeeded, update records.
328  Merges.push_back(prior(Loc));
329  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
330    // Remove kill flags from any unmerged memops that come before insertPos.
331    if (Regs[i-memOpsBegin].second) {
332      unsigned Reg = Regs[i-memOpsBegin].first;
333      if (KilledRegs.count(Reg)) {
334        unsigned j = Killer[Reg];
335        memOps[j].MBBI->getOperand(0).setIsKill(false);
336      }
337    }
338    MBB.erase(memOps[i].MBBI);
339    memOps[i].Merged = true;
340  }
341}
342
343/// MergeLDR_STR - Merge a number of load / store instructions into one or more
344/// load / store multiple instructions.
345void
346ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
347                          unsigned Base, int Opcode, unsigned Size,
348                          ARMCC::CondCodes Pred, unsigned PredReg,
349                          unsigned Scratch, MemOpQueue &MemOps,
350                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
351  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
352  int Offset = MemOps[SIndex].Offset;
353  int SOffset = Offset;
354  unsigned insertAfter = SIndex;
355  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
356  DebugLoc dl = Loc->getDebugLoc();
357  const MachineOperand &PMO = Loc->getOperand(0);
358  unsigned PReg = PMO.getReg();
359  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
360    : ARMRegisterInfo::getRegisterNumbering(PReg);
361  unsigned Count = 1;
362
363  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
364    int NewOffset = MemOps[i].Offset;
365    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
366    unsigned Reg = MO.getReg();
367    unsigned RegNum = MO.isUndef() ? UINT_MAX
368      : ARMRegisterInfo::getRegisterNumbering(Reg);
369    // AM4 - register numbers in ascending order.
370    // AM5 - consecutive register numbers in ascending order.
371    //       Can only do up to 16 double-word registers per insn.
372    if (Reg != ARM::SP &&
373        NewOffset == Offset + (int)Size &&
374        ((isAM4 && RegNum > PRegNum)
375         || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
376      Offset += Size;
377      PRegNum = RegNum;
378      ++Count;
379    } else {
380      // Can't merge this in. Try merge the earlier ones first.
381      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
382                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
383      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
384                   MemOps, Merges);
385      return;
386    }
387
388    if (MemOps[i].Position > MemOps[insertAfter].Position)
389      insertAfter = i;
390  }
391
392  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
393  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
394                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
395  return;
396}
397
398static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
399                                       unsigned Bytes, unsigned Limit,
400                                       ARMCC::CondCodes Pred, unsigned PredReg){
401  unsigned MyPredReg = 0;
402  if (!MI)
403    return false;
404  if (MI->getOpcode() != ARM::t2SUBri &&
405      MI->getOpcode() != ARM::t2SUBrSPi &&
406      MI->getOpcode() != ARM::t2SUBrSPi12 &&
407      MI->getOpcode() != ARM::tSUBspi &&
408      MI->getOpcode() != ARM::SUBri)
409    return false;
410
411  // Make sure the offset fits in 8 bits.
412  if (Bytes <= 0 || (Limit && Bytes >= Limit))
413    return false;
414
415  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
416  return (MI->getOperand(0).getReg() == Base &&
417          MI->getOperand(1).getReg() == Base &&
418          (MI->getOperand(2).getImm()*Scale) == Bytes &&
419          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
420          MyPredReg == PredReg);
421}
422
423static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
424                                       unsigned Bytes, unsigned Limit,
425                                       ARMCC::CondCodes Pred, unsigned PredReg){
426  unsigned MyPredReg = 0;
427  if (!MI)
428    return false;
429  if (MI->getOpcode() != ARM::t2ADDri &&
430      MI->getOpcode() != ARM::t2ADDrSPi &&
431      MI->getOpcode() != ARM::t2ADDrSPi12 &&
432      MI->getOpcode() != ARM::tADDspi &&
433      MI->getOpcode() != ARM::ADDri)
434    return false;
435
436  if (Bytes <= 0 || (Limit && Bytes >= Limit))
437    // Make sure the offset fits in 8 bits.
438    return false;
439
440  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
441  return (MI->getOperand(0).getReg() == Base &&
442          MI->getOperand(1).getReg() == Base &&
443          (MI->getOperand(2).getImm()*Scale) == Bytes &&
444          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
445          MyPredReg == PredReg);
446}
447
448static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
449  switch (MI->getOpcode()) {
450  default: return 0;
451  case ARM::LDR:
452  case ARM::STR:
453  case ARM::t2LDRi8:
454  case ARM::t2LDRi12:
455  case ARM::t2STRi8:
456  case ARM::t2STRi12:
457  case ARM::VLDRS:
458  case ARM::VSTRS:
459    return 4;
460  case ARM::VLDRD:
461  case ARM::VSTRD:
462    return 8;
463  case ARM::LDM:
464  case ARM::STM:
465  case ARM::t2LDM:
466  case ARM::t2STM:
467    return (MI->getNumOperands() - 4) * 4;
468  case ARM::VLDMS:
469  case ARM::VSTMS:
470  case ARM::VLDMD:
471  case ARM::VSTMD:
472    return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
473  }
474}
475
476static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
477  switch (Opc) {
478  case ARM::LDM: return ARM::LDM_UPD;
479  case ARM::STM: return ARM::STM_UPD;
480  case ARM::t2LDM: return ARM::t2LDM_UPD;
481  case ARM::t2STM: return ARM::t2STM_UPD;
482  case ARM::VLDMS: return ARM::VLDMS_UPD;
483  case ARM::VLDMD: return ARM::VLDMD_UPD;
484  case ARM::VSTMS: return ARM::VSTMS_UPD;
485  case ARM::VSTMD: return ARM::VSTMD_UPD;
486  default: llvm_unreachable("Unhandled opcode!");
487  }
488  return 0;
489}
490
491/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
492/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
493///
494/// stmia rn, <ra, rb, rc>
495/// rn := rn + 4 * 3;
496/// =>
497/// stmia rn!, <ra, rb, rc>
498///
499/// rn := rn - 4 * 3;
500/// ldmia rn, <ra, rb, rc>
501/// =>
502/// ldmdb rn!, <ra, rb, rc>
503bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
504                                               MachineBasicBlock::iterator MBBI,
505                                               bool &Advance,
506                                               MachineBasicBlock::iterator &I) {
507  MachineInstr *MI = MBBI;
508  unsigned Base = MI->getOperand(0).getReg();
509  bool BaseKill = MI->getOperand(0).isKill();
510  unsigned Bytes = getLSMultipleTransferSize(MI);
511  unsigned PredReg = 0;
512  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
513  int Opcode = MI->getOpcode();
514  DebugLoc dl = MI->getDebugLoc();
515  bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
516                Opcode == ARM::STM || Opcode == ARM::t2STM);
517
518  bool DoMerge = false;
519  ARM_AM::AMSubMode Mode = ARM_AM::ia;
520  unsigned Offset = 0;
521
522  if (isAM4) {
523    // Can't use an updating ld/st if the base register is also a dest
524    // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
525    for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
526      if (MI->getOperand(i).getReg() == Base)
527        return false;
528    }
529    Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
530  } else {
531    // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
532    Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
533    Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
534  }
535
536  // Try merging with the previous instruction.
537  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
538  if (MBBI != BeginMBBI) {
539    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
540    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
541      --PrevMBBI;
542    if (isAM4) {
543      if (Mode == ARM_AM::ia &&
544          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
545        DoMerge = true;
546        Mode = ARM_AM::db;
547      } else if (isAM4 && Mode == ARM_AM::ib &&
548                 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
549        DoMerge = true;
550        Mode = ARM_AM::da;
551      }
552    } else {
553      if (Mode == ARM_AM::ia &&
554          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
555        Mode = ARM_AM::db;
556        DoMerge = true;
557      }
558    }
559    if (DoMerge)
560      MBB.erase(PrevMBBI);
561  }
562
563  // Try merging with the next instruction.
564  MachineBasicBlock::iterator EndMBBI = MBB.end();
565  if (!DoMerge && MBBI != EndMBBI) {
566    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
567    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
568      ++NextMBBI;
569    if (isAM4) {
570      if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
571          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
572        DoMerge = true;
573      } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
574                 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
575        DoMerge = true;
576      }
577    } else {
578      if (Mode == ARM_AM::ia &&
579          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
580        DoMerge = true;
581      }
582    }
583    if (DoMerge) {
584      if (NextMBBI == I) {
585        Advance = true;
586        ++I;
587      }
588      MBB.erase(NextMBBI);
589    }
590  }
591
592  if (!DoMerge)
593    return false;
594
595  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
596  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
597    .addReg(Base, getDefRegState(true)) // WB base register
598    .addReg(Base, getKillRegState(BaseKill));
599  if (isAM4) {
600    // [t2]LDM_UPD, [t2]STM_UPD
601    MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
602      .addImm(Pred).addReg(PredReg);
603  } else {
604    // VLDM[SD}_UPD, VSTM[SD]_UPD
605    MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
606      .addImm(Pred).addReg(PredReg);
607  }
608  // Transfer the rest of operands.
609  for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
610    MIB.addOperand(MI->getOperand(OpNum));
611  // Transfer memoperands.
612  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
613
614  MBB.erase(MBBI);
615  return true;
616}
617
618static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
619  switch (Opc) {
620  case ARM::LDR: return ARM::LDR_PRE;
621  case ARM::STR: return ARM::STR_PRE;
622  case ARM::VLDRS: return ARM::VLDMS_UPD;
623  case ARM::VLDRD: return ARM::VLDMD_UPD;
624  case ARM::VSTRS: return ARM::VSTMS_UPD;
625  case ARM::VSTRD: return ARM::VSTMD_UPD;
626  case ARM::t2LDRi8:
627  case ARM::t2LDRi12:
628    return ARM::t2LDR_PRE;
629  case ARM::t2STRi8:
630  case ARM::t2STRi12:
631    return ARM::t2STR_PRE;
632  default: llvm_unreachable("Unhandled opcode!");
633  }
634  return 0;
635}
636
637static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
638  switch (Opc) {
639  case ARM::LDR: return ARM::LDR_POST;
640  case ARM::STR: return ARM::STR_POST;
641  case ARM::VLDRS: return ARM::VLDMS_UPD;
642  case ARM::VLDRD: return ARM::VLDMD_UPD;
643  case ARM::VSTRS: return ARM::VSTMS_UPD;
644  case ARM::VSTRD: return ARM::VSTMD_UPD;
645  case ARM::t2LDRi8:
646  case ARM::t2LDRi12:
647    return ARM::t2LDR_POST;
648  case ARM::t2STRi8:
649  case ARM::t2STRi12:
650    return ARM::t2STR_POST;
651  default: llvm_unreachable("Unhandled opcode!");
652  }
653  return 0;
654}
655
656/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
657/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
658bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
659                                               MachineBasicBlock::iterator MBBI,
660                                               const TargetInstrInfo *TII,
661                                               bool &Advance,
662                                               MachineBasicBlock::iterator &I) {
663  MachineInstr *MI = MBBI;
664  unsigned Base = MI->getOperand(1).getReg();
665  bool BaseKill = MI->getOperand(1).isKill();
666  unsigned Bytes = getLSMultipleTransferSize(MI);
667  int Opcode = MI->getOpcode();
668  DebugLoc dl = MI->getDebugLoc();
669  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
670                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
671  bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
672  if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
673    return false;
674  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
675    return false;
676  if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
677    if (MI->getOperand(2).getImm() != 0)
678      return false;
679
680  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
681  // Can't do the merge if the destination register is the same as the would-be
682  // writeback register.
683  if (isLd && MI->getOperand(0).getReg() == Base)
684    return false;
685
686  unsigned PredReg = 0;
687  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
688  bool DoMerge = false;
689  ARM_AM::AddrOpc AddSub = ARM_AM::add;
690  unsigned NewOpc = 0;
691  // AM2 - 12 bits, thumb2 - 8 bits.
692  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
693
694  // Try merging with the previous instruction.
695  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
696  if (MBBI != BeginMBBI) {
697    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
698    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
699      --PrevMBBI;
700    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
701      DoMerge = true;
702      AddSub = ARM_AM::sub;
703    } else if (!isAM5 &&
704               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
705      DoMerge = true;
706    }
707    if (DoMerge) {
708      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
709      MBB.erase(PrevMBBI);
710    }
711  }
712
713  // Try merging with the next instruction.
714  MachineBasicBlock::iterator EndMBBI = MBB.end();
715  if (!DoMerge && MBBI != EndMBBI) {
716    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
717    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
718      ++NextMBBI;
719    if (!isAM5 &&
720        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
721      DoMerge = true;
722      AddSub = ARM_AM::sub;
723    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
724      DoMerge = true;
725    }
726    if (DoMerge) {
727      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
728      if (NextMBBI == I) {
729        Advance = true;
730        ++I;
731      }
732      MBB.erase(NextMBBI);
733    }
734  }
735
736  if (!DoMerge)
737    return false;
738
739  bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
740  unsigned Offset = 0;
741  if (isAM5)
742    Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
743                               (isDPR ? 2 : 1));
744  else if (isAM2)
745    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
746  else
747    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
748
749  if (isAM5) {
750    // VLDM[SD}_UPD, VSTM[SD]_UPD
751    MachineOperand &MO = MI->getOperand(0);
752    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
753      .addReg(Base, getDefRegState(true)) // WB base register
754      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
755      .addImm(Offset)
756      .addImm(Pred).addReg(PredReg)
757      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
758                            getKillRegState(MO.isKill())));
759  } else if (isLd) {
760    if (isAM2)
761      // LDR_PRE, LDR_POST,
762      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
763        .addReg(Base, RegState::Define)
764        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
765    else
766      // t2LDR_PRE, t2LDR_POST
767      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
768        .addReg(Base, RegState::Define)
769        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
770  } else {
771    MachineOperand &MO = MI->getOperand(0);
772    if (isAM2)
773      // STR_PRE, STR_POST
774      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
775        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
776        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
777    else
778      // t2STR_PRE, t2STR_POST
779      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
780        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
781        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
782  }
783  MBB.erase(MBBI);
784
785  return true;
786}
787
788/// isMemoryOp - Returns true if instruction is a memory operations (that this
789/// pass is capable of operating on).
790static bool isMemoryOp(const MachineInstr *MI) {
791  // When no memory operands are present, conservatively assume unaligned,
792  // volatile, unfoldable.
793  if (!MI->hasOneMemOperand())
794    return false;
795
796  const MachineMemOperand *MMO = *MI->memoperands_begin();
797
798  // Don't touch volatile memory accesses - we may be changing their order.
799  if (MMO->isVolatile())
800    return false;
801
802  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
803  // not.
804  if (MMO->getAlignment() < 4)
805    return false;
806
807  // str <undef> could probably be eliminated entirely, but for now we just want
808  // to avoid making a mess of it.
809  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
810  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
811      MI->getOperand(0).isUndef())
812    return false;
813
814  // Likewise don't mess with references to undefined addresses.
815  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
816      MI->getOperand(1).isUndef())
817    return false;
818
819  int Opcode = MI->getOpcode();
820  switch (Opcode) {
821  default: break;
822  case ARM::LDR:
823  case ARM::STR:
824    return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
825  case ARM::VLDRS:
826  case ARM::VSTRS:
827    return MI->getOperand(1).isReg();
828  case ARM::VLDRD:
829  case ARM::VSTRD:
830    return MI->getOperand(1).isReg();
831  case ARM::t2LDRi8:
832  case ARM::t2LDRi12:
833  case ARM::t2STRi8:
834  case ARM::t2STRi12:
835    return MI->getOperand(1).isReg();
836  }
837  return false;
838}
839
840/// AdvanceRS - Advance register scavenger to just before the earliest memory
841/// op that is being merged.
842void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
843  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
844  unsigned Position = MemOps[0].Position;
845  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
846    if (MemOps[i].Position < Position) {
847      Position = MemOps[i].Position;
848      Loc = MemOps[i].MBBI;
849    }
850  }
851
852  if (Loc != MBB.begin())
853    RS->forward(prior(Loc));
854}
855
856static int getMemoryOpOffset(const MachineInstr *MI) {
857  int Opcode = MI->getOpcode();
858  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
859  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
860  unsigned NumOperands = MI->getDesc().getNumOperands();
861  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
862
863  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
864      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
865      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
866    return OffField;
867
868  int Offset = isAM2
869    ? ARM_AM::getAM2Offset(OffField)
870    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
871             : ARM_AM::getAM5Offset(OffField) * 4);
872  if (isAM2) {
873    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
874      Offset = -Offset;
875  } else if (isAM3) {
876    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
877      Offset = -Offset;
878  } else {
879    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
880      Offset = -Offset;
881  }
882  return Offset;
883}
884
885static void InsertLDR_STR(MachineBasicBlock &MBB,
886                          MachineBasicBlock::iterator &MBBI,
887                          int OffImm, bool isDef,
888                          DebugLoc dl, unsigned NewOpc,
889                          unsigned Reg, bool RegDeadKill, bool RegUndef,
890                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
891                          unsigned OffReg, bool OffKill, bool OffUndef,
892                          ARMCC::CondCodes Pred, unsigned PredReg,
893                          const TargetInstrInfo *TII, bool isT2) {
894  int Offset = OffImm;
895  if (!isT2) {
896    if (OffImm < 0)
897      Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
898    else
899      Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
900  }
901  if (isDef) {
902    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
903                                      TII->get(NewOpc))
904      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
905      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
906    if (!isT2)
907      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
908    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
909  } else {
910    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
911                                      TII->get(NewOpc))
912      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
913      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
914    if (!isT2)
915      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
916    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
917  }
918}
919
920bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
921                                          MachineBasicBlock::iterator &MBBI) {
922  MachineInstr *MI = &*MBBI;
923  unsigned Opcode = MI->getOpcode();
924  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
925      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
926    unsigned EvenReg = MI->getOperand(0).getReg();
927    unsigned OddReg  = MI->getOperand(1).getReg();
928    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
929    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
930    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
931      return false;
932
933    MachineBasicBlock::iterator NewBBI = MBBI;
934    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
935    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
936    bool EvenDeadKill = isLd ?
937      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
938    bool EvenUndef = MI->getOperand(0).isUndef();
939    bool OddDeadKill  = isLd ?
940      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
941    bool OddUndef = MI->getOperand(1).isUndef();
942    const MachineOperand &BaseOp = MI->getOperand(2);
943    unsigned BaseReg = BaseOp.getReg();
944    bool BaseKill = BaseOp.isKill();
945    bool BaseUndef = BaseOp.isUndef();
946    unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
947    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
948    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
949    int OffImm = getMemoryOpOffset(MI);
950    unsigned PredReg = 0;
951    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
952
953    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
954      // Ascending register numbers and no offset. It's safe to change it to a
955      // ldm or stm.
956      unsigned NewOpc = (isLd)
957        ? (isT2 ? ARM::t2LDM : ARM::LDM)
958        : (isT2 ? ARM::t2STM : ARM::STM);
959      if (isLd) {
960        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
961          .addReg(BaseReg, getKillRegState(BaseKill))
962          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
963          .addImm(Pred).addReg(PredReg)
964          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
965          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
966        ++NumLDRD2LDM;
967      } else {
968        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
969          .addReg(BaseReg, getKillRegState(BaseKill))
970          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
971          .addImm(Pred).addReg(PredReg)
972          .addReg(EvenReg,
973                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
974          .addReg(OddReg,
975                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
976        ++NumSTRD2STM;
977      }
978      NewBBI = llvm::prior(MBBI);
979    } else {
980      // Split into two instructions.
981      assert((!isT2 || !OffReg) &&
982             "Thumb2 ldrd / strd does not encode offset register!");
983      unsigned NewOpc = (isLd)
984        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
985        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
986      DebugLoc dl = MBBI->getDebugLoc();
987      // If this is a load and base register is killed, it may have been
988      // re-defed by the load, make sure the first load does not clobber it.
989      if (isLd &&
990          (BaseKill || OffKill) &&
991          (TRI->regsOverlap(EvenReg, BaseReg) ||
992           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
993        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
994               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
995        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
996                      OddReg, OddDeadKill, false,
997                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
998                      Pred, PredReg, TII, isT2);
999        NewBBI = llvm::prior(MBBI);
1000        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1001                      EvenReg, EvenDeadKill, false,
1002                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
1003                      Pred, PredReg, TII, isT2);
1004      } else {
1005        if (OddReg == EvenReg && EvenDeadKill) {
1006          // If the two source operands are the same, the kill marker is
1007          // probably on the first one. e.g.
1008          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1009          EvenDeadKill = false;
1010          OddDeadKill = true;
1011        }
1012        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1013                      EvenReg, EvenDeadKill, EvenUndef,
1014                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
1015                      Pred, PredReg, TII, isT2);
1016        NewBBI = llvm::prior(MBBI);
1017        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1018                      OddReg, OddDeadKill, OddUndef,
1019                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
1020                      Pred, PredReg, TII, isT2);
1021      }
1022      if (isLd)
1023        ++NumLDRD2LDR;
1024      else
1025        ++NumSTRD2STR;
1026    }
1027
1028    MBB.erase(MI);
1029    MBBI = NewBBI;
1030    return true;
1031  }
1032  return false;
1033}
1034
1035/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1036/// ops of the same base and incrementing offset into LDM / STM ops.
1037bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1038  unsigned NumMerges = 0;
1039  unsigned NumMemOps = 0;
1040  MemOpQueue MemOps;
1041  unsigned CurrBase = 0;
1042  int CurrOpc = -1;
1043  unsigned CurrSize = 0;
1044  ARMCC::CondCodes CurrPred = ARMCC::AL;
1045  unsigned CurrPredReg = 0;
1046  unsigned Position = 0;
1047  SmallVector<MachineBasicBlock::iterator,4> Merges;
1048
1049  RS->enterBasicBlock(&MBB);
1050  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1051  while (MBBI != E) {
1052    if (FixInvalidRegPairOp(MBB, MBBI))
1053      continue;
1054
1055    bool Advance  = false;
1056    bool TryMerge = false;
1057    bool Clobber  = false;
1058
1059    bool isMemOp = isMemoryOp(MBBI);
1060    if (isMemOp) {
1061      int Opcode = MBBI->getOpcode();
1062      unsigned Size = getLSMultipleTransferSize(MBBI);
1063      const MachineOperand &MO = MBBI->getOperand(0);
1064      unsigned Reg = MO.getReg();
1065      bool isKill = MO.isDef() ? false : MO.isKill();
1066      unsigned Base = MBBI->getOperand(1).getReg();
1067      unsigned PredReg = 0;
1068      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1069      int Offset = getMemoryOpOffset(MBBI);
1070      // Watch out for:
1071      // r4 := ldr [r5]
1072      // r5 := ldr [r5, #4]
1073      // r6 := ldr [r5, #8]
1074      //
1075      // The second ldr has effectively broken the chain even though it
1076      // looks like the later ldr(s) use the same base register. Try to
1077      // merge the ldr's so far, including this one. But don't try to
1078      // combine the following ldr(s).
1079      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1080      if (CurrBase == 0 && !Clobber) {
1081        // Start of a new chain.
1082        CurrBase = Base;
1083        CurrOpc  = Opcode;
1084        CurrSize = Size;
1085        CurrPred = Pred;
1086        CurrPredReg = PredReg;
1087        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1088        ++NumMemOps;
1089        Advance = true;
1090      } else {
1091        if (Clobber) {
1092          TryMerge = true;
1093          Advance = true;
1094        }
1095
1096        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1097          // No need to match PredReg.
1098          // Continue adding to the queue.
1099          if (Offset > MemOps.back().Offset) {
1100            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1101                                             Position, MBBI));
1102            ++NumMemOps;
1103            Advance = true;
1104          } else {
1105            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1106                 I != E; ++I) {
1107              if (Offset < I->Offset) {
1108                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1109                                                 Position, MBBI));
1110                ++NumMemOps;
1111                Advance = true;
1112                break;
1113              } else if (Offset == I->Offset) {
1114                // Collision! This can't be merged!
1115                break;
1116              }
1117            }
1118          }
1119        }
1120      }
1121    }
1122
1123    if (MBBI->isDebugValue()) {
1124      ++MBBI;
1125      if (MBBI == E)
1126        // Reach the end of the block, try merging the memory instructions.
1127        TryMerge = true;
1128    } else if (Advance) {
1129      ++Position;
1130      ++MBBI;
1131      if (MBBI == E)
1132        // Reach the end of the block, try merging the memory instructions.
1133        TryMerge = true;
1134    } else
1135      TryMerge = true;
1136
1137    if (TryMerge) {
1138      if (NumMemOps > 1) {
1139        // Try to find a free register to use as a new base in case it's needed.
1140        // First advance to the instruction just before the start of the chain.
1141        AdvanceRS(MBB, MemOps);
1142        // Find a scratch register.
1143        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1144        // Process the load / store instructions.
1145        RS->forward(prior(MBBI));
1146
1147        // Merge ops.
1148        Merges.clear();
1149        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1150                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1151
1152        // Try folding preceeding/trailing base inc/dec into the generated
1153        // LDM/STM ops.
1154        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1155          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1156            ++NumMerges;
1157        NumMerges += Merges.size();
1158
1159        // Try folding preceeding/trailing base inc/dec into those load/store
1160        // that were not merged to form LDM/STM ops.
1161        for (unsigned i = 0; i != NumMemOps; ++i)
1162          if (!MemOps[i].Merged)
1163            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1164              ++NumMerges;
1165
1166        // RS may be pointing to an instruction that's deleted.
1167        RS->skipTo(prior(MBBI));
1168      } else if (NumMemOps == 1) {
1169        // Try folding preceeding/trailing base inc/dec into the single
1170        // load/store.
1171        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1172          ++NumMerges;
1173          RS->forward(prior(MBBI));
1174        }
1175      }
1176
1177      CurrBase = 0;
1178      CurrOpc = -1;
1179      CurrSize = 0;
1180      CurrPred = ARMCC::AL;
1181      CurrPredReg = 0;
1182      if (NumMemOps) {
1183        MemOps.clear();
1184        NumMemOps = 0;
1185      }
1186
1187      // If iterator hasn't been advanced and this is not a memory op, skip it.
1188      // It can't start a new chain anyway.
1189      if (!Advance && !isMemOp && MBBI != E) {
1190        ++Position;
1191        ++MBBI;
1192      }
1193    }
1194  }
1195  return NumMerges > 0;
1196}
1197
1198namespace {
1199  struct OffsetCompare {
1200    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1201      int LOffset = getMemoryOpOffset(LHS);
1202      int ROffset = getMemoryOpOffset(RHS);
1203      assert(LHS == RHS || LOffset != ROffset);
1204      return LOffset > ROffset;
1205    }
1206  };
1207}
1208
1209/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1210/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1211/// directly restore the value of LR into pc.
1212///   ldmfd sp!, {..., lr}
1213///   bx lr
1214/// or
1215///   ldmfd sp!, {..., lr}
1216///   mov pc, lr
1217/// =>
1218///   ldmfd sp!, {..., pc}
1219bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1220  if (MBB.empty()) return false;
1221
1222  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1223  if (MBBI != MBB.begin() &&
1224      (MBBI->getOpcode() == ARM::BX_RET ||
1225       MBBI->getOpcode() == ARM::tBX_RET ||
1226       MBBI->getOpcode() == ARM::MOVPCLR)) {
1227    MachineInstr *PrevMI = prior(MBBI);
1228    if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1229        PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1230      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1231      if (MO.getReg() != ARM::LR)
1232        return false;
1233      unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1234      PrevMI->setDesc(TII->get(NewOpc));
1235      MO.setReg(ARM::PC);
1236      MBB.erase(MBBI);
1237      return true;
1238    }
1239  }
1240  return false;
1241}
1242
1243bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1244  const TargetMachine &TM = Fn.getTarget();
1245  AFI = Fn.getInfo<ARMFunctionInfo>();
1246  TII = TM.getInstrInfo();
1247  TRI = TM.getRegisterInfo();
1248  RS = new RegScavenger();
1249  isThumb2 = AFI->isThumb2Function();
1250
1251  bool Modified = false;
1252  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1253       ++MFI) {
1254    MachineBasicBlock &MBB = *MFI;
1255    Modified |= LoadStoreMultipleOpti(MBB);
1256    Modified |= MergeReturnIntoLDM(MBB);
1257  }
1258
1259  delete RS;
1260  return Modified;
1261}
1262
1263
1264/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1265/// load / stores from consecutive locations close to make it more
1266/// likely they will be combined later.
1267
1268namespace {
1269  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1270    static char ID;
1271    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1272
1273    const TargetData *TD;
1274    const TargetInstrInfo *TII;
1275    const TargetRegisterInfo *TRI;
1276    const ARMSubtarget *STI;
1277    MachineRegisterInfo *MRI;
1278    MachineFunction *MF;
1279
1280    virtual bool runOnMachineFunction(MachineFunction &Fn);
1281
1282    virtual const char *getPassName() const {
1283      return "ARM pre- register allocation load / store optimization pass";
1284    }
1285
1286  private:
1287    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1288                          unsigned &NewOpc, unsigned &EvenReg,
1289                          unsigned &OddReg, unsigned &BaseReg,
1290                          unsigned &OffReg, int &Offset,
1291                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1292                          bool &isT2);
1293    bool RescheduleOps(MachineBasicBlock *MBB,
1294                       SmallVector<MachineInstr*, 4> &Ops,
1295                       unsigned Base, bool isLd,
1296                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1297    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1298  };
1299  char ARMPreAllocLoadStoreOpt::ID = 0;
1300}
1301
1302bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1303  TD  = Fn.getTarget().getTargetData();
1304  TII = Fn.getTarget().getInstrInfo();
1305  TRI = Fn.getTarget().getRegisterInfo();
1306  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1307  MRI = &Fn.getRegInfo();
1308  MF  = &Fn;
1309
1310  bool Modified = false;
1311  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1312       ++MFI)
1313    Modified |= RescheduleLoadStoreInstrs(MFI);
1314
1315  return Modified;
1316}
1317
1318static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1319                                      MachineBasicBlock::iterator I,
1320                                      MachineBasicBlock::iterator E,
1321                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1322                                      SmallSet<unsigned, 4> &MemRegs,
1323                                      const TargetRegisterInfo *TRI) {
1324  // Are there stores / loads / calls between them?
1325  // FIXME: This is overly conservative. We should make use of alias information
1326  // some day.
1327  SmallSet<unsigned, 4> AddedRegPressure;
1328  while (++I != E) {
1329    if (I->isDebugValue() || MemOps.count(&*I))
1330      continue;
1331    const TargetInstrDesc &TID = I->getDesc();
1332    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1333      return false;
1334    if (isLd && TID.mayStore())
1335      return false;
1336    if (!isLd) {
1337      if (TID.mayLoad())
1338        return false;
1339      // It's not safe to move the first 'str' down.
1340      // str r1, [r0]
1341      // strh r5, [r0]
1342      // str r4, [r0, #+4]
1343      if (TID.mayStore())
1344        return false;
1345    }
1346    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1347      MachineOperand &MO = I->getOperand(j);
1348      if (!MO.isReg())
1349        continue;
1350      unsigned Reg = MO.getReg();
1351      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1352        return false;
1353      if (Reg != Base && !MemRegs.count(Reg))
1354        AddedRegPressure.insert(Reg);
1355    }
1356  }
1357
1358  // Estimate register pressure increase due to the transformation.
1359  if (MemRegs.size() <= 4)
1360    // Ok if we are moving small number of instructions.
1361    return true;
1362  return AddedRegPressure.size() <= MemRegs.size() * 2;
1363}
1364
1365bool
1366ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1367                                          DebugLoc &dl,
1368                                          unsigned &NewOpc, unsigned &EvenReg,
1369                                          unsigned &OddReg, unsigned &BaseReg,
1370                                          unsigned &OffReg, int &Offset,
1371                                          unsigned &PredReg,
1372                                          ARMCC::CondCodes &Pred,
1373                                          bool &isT2) {
1374  // Make sure we're allowed to generate LDRD/STRD.
1375  if (!STI->hasV5TEOps())
1376    return false;
1377
1378  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1379  unsigned Scale = 1;
1380  unsigned Opcode = Op0->getOpcode();
1381  if (Opcode == ARM::LDR)
1382    NewOpc = ARM::LDRD;
1383  else if (Opcode == ARM::STR)
1384    NewOpc = ARM::STRD;
1385  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1386    NewOpc = ARM::t2LDRDi8;
1387    Scale = 4;
1388    isT2 = true;
1389  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1390    NewOpc = ARM::t2STRDi8;
1391    Scale = 4;
1392    isT2 = true;
1393  } else
1394    return false;
1395
1396  // Make sure the offset registers match.
1397  if (!isT2 &&
1398      (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1399      return false;
1400
1401  // Must sure the base address satisfies i64 ld / st alignment requirement.
1402  if (!Op0->hasOneMemOperand() ||
1403      !(*Op0->memoperands_begin())->getValue() ||
1404      (*Op0->memoperands_begin())->isVolatile())
1405    return false;
1406
1407  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1408  const Function *Func = MF->getFunction();
1409  unsigned ReqAlign = STI->hasV6Ops()
1410    ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1411    : 8;  // Pre-v6 need 8-byte align
1412  if (Align < ReqAlign)
1413    return false;
1414
1415  // Then make sure the immediate offset fits.
1416  int OffImm = getMemoryOpOffset(Op0);
1417  if (isT2) {
1418    if (OffImm < 0) {
1419      if (OffImm < -255)
1420        // Can't fall back to t2LDRi8 / t2STRi8.
1421        return false;
1422    } else {
1423      int Limit = (1 << 8) * Scale;
1424      if (OffImm >= Limit || (OffImm & (Scale-1)))
1425        return false;
1426    }
1427    Offset = OffImm;
1428  } else {
1429    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1430    if (OffImm < 0) {
1431      AddSub = ARM_AM::sub;
1432      OffImm = - OffImm;
1433    }
1434    int Limit = (1 << 8) * Scale;
1435    if (OffImm >= Limit || (OffImm & (Scale-1)))
1436      return false;
1437    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1438  }
1439  EvenReg = Op0->getOperand(0).getReg();
1440  OddReg  = Op1->getOperand(0).getReg();
1441  if (EvenReg == OddReg)
1442    return false;
1443  BaseReg = Op0->getOperand(1).getReg();
1444  if (!isT2)
1445    OffReg = Op0->getOperand(2).getReg();
1446  Pred = llvm::getInstrPredicate(Op0, PredReg);
1447  dl = Op0->getDebugLoc();
1448  return true;
1449}
1450
1451bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1452                                 SmallVector<MachineInstr*, 4> &Ops,
1453                                 unsigned Base, bool isLd,
1454                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1455  bool RetVal = false;
1456
1457  // Sort by offset (in reverse order).
1458  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1459
1460  // The loads / stores of the same base are in order. Scan them from first to
1461  // last and check for the following:
1462  // 1. Any def of base.
1463  // 2. Any gaps.
1464  while (Ops.size() > 1) {
1465    unsigned FirstLoc = ~0U;
1466    unsigned LastLoc = 0;
1467    MachineInstr *FirstOp = 0;
1468    MachineInstr *LastOp = 0;
1469    int LastOffset = 0;
1470    unsigned LastOpcode = 0;
1471    unsigned LastBytes = 0;
1472    unsigned NumMove = 0;
1473    for (int i = Ops.size() - 1; i >= 0; --i) {
1474      MachineInstr *Op = Ops[i];
1475      unsigned Loc = MI2LocMap[Op];
1476      if (Loc <= FirstLoc) {
1477        FirstLoc = Loc;
1478        FirstOp = Op;
1479      }
1480      if (Loc >= LastLoc) {
1481        LastLoc = Loc;
1482        LastOp = Op;
1483      }
1484
1485      unsigned Opcode = Op->getOpcode();
1486      if (LastOpcode && Opcode != LastOpcode)
1487        break;
1488
1489      int Offset = getMemoryOpOffset(Op);
1490      unsigned Bytes = getLSMultipleTransferSize(Op);
1491      if (LastBytes) {
1492        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1493          break;
1494      }
1495      LastOffset = Offset;
1496      LastBytes = Bytes;
1497      LastOpcode = Opcode;
1498      if (++NumMove == 8) // FIXME: Tune this limit.
1499        break;
1500    }
1501
1502    if (NumMove <= 1)
1503      Ops.pop_back();
1504    else {
1505      SmallPtrSet<MachineInstr*, 4> MemOps;
1506      SmallSet<unsigned, 4> MemRegs;
1507      for (int i = NumMove-1; i >= 0; --i) {
1508        MemOps.insert(Ops[i]);
1509        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1510      }
1511
1512      // Be conservative, if the instructions are too far apart, don't
1513      // move them. We want to limit the increase of register pressure.
1514      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1515      if (DoMove)
1516        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1517                                           MemOps, MemRegs, TRI);
1518      if (!DoMove) {
1519        for (unsigned i = 0; i != NumMove; ++i)
1520          Ops.pop_back();
1521      } else {
1522        // This is the new location for the loads / stores.
1523        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1524        while (InsertPos != MBB->end()
1525               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1526          ++InsertPos;
1527
1528        // If we are moving a pair of loads / stores, see if it makes sense
1529        // to try to allocate a pair of registers that can form register pairs.
1530        MachineInstr *Op0 = Ops.back();
1531        MachineInstr *Op1 = Ops[Ops.size()-2];
1532        unsigned EvenReg = 0, OddReg = 0;
1533        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1534        ARMCC::CondCodes Pred = ARMCC::AL;
1535        bool isT2 = false;
1536        unsigned NewOpc = 0;
1537        int Offset = 0;
1538        DebugLoc dl;
1539        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1540                                             EvenReg, OddReg, BaseReg, OffReg,
1541                                             Offset, PredReg, Pred, isT2)) {
1542          Ops.pop_back();
1543          Ops.pop_back();
1544
1545          // Form the pair instruction.
1546          if (isLd) {
1547            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1548                                              dl, TII->get(NewOpc))
1549              .addReg(EvenReg, RegState::Define)
1550              .addReg(OddReg, RegState::Define)
1551              .addReg(BaseReg);
1552            if (!isT2)
1553              MIB.addReg(OffReg);
1554            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1555            ++NumLDRDFormed;
1556          } else {
1557            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1558                                              dl, TII->get(NewOpc))
1559              .addReg(EvenReg)
1560              .addReg(OddReg)
1561              .addReg(BaseReg);
1562            if (!isT2)
1563              MIB.addReg(OffReg);
1564            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1565            ++NumSTRDFormed;
1566          }
1567          MBB->erase(Op0);
1568          MBB->erase(Op1);
1569
1570          // Add register allocation hints to form register pairs.
1571          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1572          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1573        } else {
1574          for (unsigned i = 0; i != NumMove; ++i) {
1575            MachineInstr *Op = Ops.back();
1576            Ops.pop_back();
1577            MBB->splice(InsertPos, MBB, Op);
1578          }
1579        }
1580
1581        NumLdStMoved += NumMove;
1582        RetVal = true;
1583      }
1584    }
1585  }
1586
1587  return RetVal;
1588}
1589
1590bool
1591ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1592  bool RetVal = false;
1593
1594  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1595  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1596  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1597  SmallVector<unsigned, 4> LdBases;
1598  SmallVector<unsigned, 4> StBases;
1599
1600  unsigned Loc = 0;
1601  MachineBasicBlock::iterator MBBI = MBB->begin();
1602  MachineBasicBlock::iterator E = MBB->end();
1603  while (MBBI != E) {
1604    for (; MBBI != E; ++MBBI) {
1605      MachineInstr *MI = MBBI;
1606      const TargetInstrDesc &TID = MI->getDesc();
1607      if (TID.isCall() || TID.isTerminator()) {
1608        // Stop at barriers.
1609        ++MBBI;
1610        break;
1611      }
1612
1613      if (!MI->isDebugValue())
1614        MI2LocMap[MI] = ++Loc;
1615
1616      if (!isMemoryOp(MI))
1617        continue;
1618      unsigned PredReg = 0;
1619      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1620        continue;
1621
1622      int Opc = MI->getOpcode();
1623      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1624      unsigned Base = MI->getOperand(1).getReg();
1625      int Offset = getMemoryOpOffset(MI);
1626
1627      bool StopHere = false;
1628      if (isLd) {
1629        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1630          Base2LdsMap.find(Base);
1631        if (BI != Base2LdsMap.end()) {
1632          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1633            if (Offset == getMemoryOpOffset(BI->second[i])) {
1634              StopHere = true;
1635              break;
1636            }
1637          }
1638          if (!StopHere)
1639            BI->second.push_back(MI);
1640        } else {
1641          SmallVector<MachineInstr*, 4> MIs;
1642          MIs.push_back(MI);
1643          Base2LdsMap[Base] = MIs;
1644          LdBases.push_back(Base);
1645        }
1646      } else {
1647        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1648          Base2StsMap.find(Base);
1649        if (BI != Base2StsMap.end()) {
1650          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1651            if (Offset == getMemoryOpOffset(BI->second[i])) {
1652              StopHere = true;
1653              break;
1654            }
1655          }
1656          if (!StopHere)
1657            BI->second.push_back(MI);
1658        } else {
1659          SmallVector<MachineInstr*, 4> MIs;
1660          MIs.push_back(MI);
1661          Base2StsMap[Base] = MIs;
1662          StBases.push_back(Base);
1663        }
1664      }
1665
1666      if (StopHere) {
1667        // Found a duplicate (a base+offset combination that's seen earlier).
1668        // Backtrack.
1669        --Loc;
1670        break;
1671      }
1672    }
1673
1674    // Re-schedule loads.
1675    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1676      unsigned Base = LdBases[i];
1677      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1678      if (Lds.size() > 1)
1679        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1680    }
1681
1682    // Re-schedule stores.
1683    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1684      unsigned Base = StBases[i];
1685      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1686      if (Sts.size() > 1)
1687        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1688    }
1689
1690    if (MBBI != E) {
1691      Base2LdsMap.clear();
1692      Base2StsMap.clear();
1693      LdBases.clear();
1694      StBases.clear();
1695    }
1696  }
1697
1698  return RetVal;
1699}
1700
1701
1702/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1703/// optimization pass.
1704FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1705  if (PreAlloc)
1706    return new ARMPreAllocLoadStoreOpt();
1707  return new ARMLoadStoreOpt();
1708}
1709