ARMLoadStoreOptimizer.cpp revision 958e4e1967838766d327f1112e5b4900be939275
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMBaseInstrInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMRegisterInfo.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/Target/TargetData.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetMachine.h"
32#include "llvm/Target/TargetRegisterInfo.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallSet.h"
38#include "llvm/ADT/SmallVector.h"
39#include "llvm/ADT/Statistic.h"
40using namespace llvm;
41
42STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43STATISTIC(NumSTMGened , "Number of stm instructions generated");
44STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
50STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
51STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
52STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
53
54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55/// load / store instructions to form ldm / stm instructions.
56
57namespace {
58  struct ARMLoadStoreOpt : public MachineFunctionPass {
59    static char ID;
60    ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
61
62    const TargetInstrInfo *TII;
63    const TargetRegisterInfo *TRI;
64    ARMFunctionInfo *AFI;
65    RegScavenger *RS;
66    bool isThumb2;
67
68    virtual bool runOnMachineFunction(MachineFunction &Fn);
69
70    virtual const char *getPassName() const {
71      return "ARM load / store optimization pass";
72    }
73
74  private:
75    struct MemOpQueueEntry {
76      int Offset;
77      unsigned Position;
78      MachineBasicBlock::iterator MBBI;
79      bool Merged;
80      MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81        : Offset(o), Position(p), MBBI(i), Merged(false) {}
82    };
83    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84    typedef MemOpQueue::iterator MemOpQueueIter;
85
86    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87                  int Offset, unsigned Base, bool BaseKill, int Opcode,
88                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90    void MergeOpsUpdate(MachineBasicBlock &MBB,
91                        MemOpQueue &MemOps,
92                        unsigned memOpsBegin,
93                        unsigned memOpsEnd,
94                        unsigned insertAfter,
95                        int Offset,
96                        unsigned Base,
97                        bool BaseKill,
98                        int Opcode,
99                        ARMCC::CondCodes Pred,
100                        unsigned PredReg,
101                        unsigned Scratch,
102                        DebugLoc dl,
103                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105                      int Opcode, unsigned Size,
106                      ARMCC::CondCodes Pred, unsigned PredReg,
107                      unsigned Scratch, MemOpQueue &MemOps,
108                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
109
110    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112                             MachineBasicBlock::iterator &MBBI);
113    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114                                  MachineBasicBlock::iterator MBBI,
115                                  const TargetInstrInfo *TII,
116                                  bool &Advance,
117                                  MachineBasicBlock::iterator &I);
118    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119                                   MachineBasicBlock::iterator MBBI,
120                                   bool &Advance,
121                                   MachineBasicBlock::iterator &I);
122    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
124  };
125  char ARMLoadStoreOpt::ID = 0;
126}
127
128static int getLoadStoreMultipleOpcode(int Opcode) {
129  switch (Opcode) {
130  case ARM::LDR:
131    NumLDMGened++;
132    return ARM::LDM;
133  case ARM::STR:
134    NumSTMGened++;
135    return ARM::STM;
136  case ARM::t2LDRi8:
137  case ARM::t2LDRi12:
138    NumLDMGened++;
139    return ARM::t2LDM;
140  case ARM::t2STRi8:
141  case ARM::t2STRi12:
142    NumSTMGened++;
143    return ARM::t2STM;
144  case ARM::VLDRS:
145    NumVLDMGened++;
146    return ARM::VLDMS;
147  case ARM::VSTRS:
148    NumVSTMGened++;
149    return ARM::VSTMS;
150  case ARM::VLDRD:
151    NumVLDMGened++;
152    return ARM::VLDMD;
153  case ARM::VSTRD:
154    NumVSTMGened++;
155    return ARM::VSTMD;
156  default: llvm_unreachable("Unhandled opcode!");
157  }
158  return 0;
159}
160
161static bool isT2i32Load(unsigned Opc) {
162  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
163}
164
165static bool isi32Load(unsigned Opc) {
166  return Opc == ARM::LDR || isT2i32Load(Opc);
167}
168
169static bool isT2i32Store(unsigned Opc) {
170  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
171}
172
173static bool isi32Store(unsigned Opc) {
174  return Opc == ARM::STR || isT2i32Store(Opc);
175}
176
177/// MergeOps - Create and insert a LDM or STM with Base as base register and
178/// registers in Regs as the register operands that would be loaded / stored.
179/// It returns true if the transformation is done.
180bool
181ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182                          MachineBasicBlock::iterator MBBI,
183                          int Offset, unsigned Base, bool BaseKill,
184                          int Opcode, ARMCC::CondCodes Pred,
185                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
186                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187  // Only a single register to load / store. Don't bother.
188  unsigned NumRegs = Regs.size();
189  if (NumRegs <= 1)
190    return false;
191
192  ARM_AM::AMSubMode Mode = ARM_AM::ia;
193  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194  if (isAM4 && Offset == 4) {
195    if (isThumb2)
196      // Thumb2 does not support ldmib / stmib.
197      return false;
198    Mode = ARM_AM::ib;
199  } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
200    if (isThumb2)
201      // Thumb2 does not support ldmda / stmda.
202      return false;
203    Mode = ARM_AM::da;
204  } else if (isAM4 && Offset == -4 * (int)NumRegs) {
205    Mode = ARM_AM::db;
206  } else if (Offset != 0) {
207    // If starting offset isn't zero, insert a MI to materialize a new base.
208    // But only do so if it is cost effective, i.e. merging more than two
209    // loads / stores.
210    if (NumRegs <= 2)
211      return false;
212
213    unsigned NewBase;
214    if (isi32Load(Opcode))
215      // If it is a load, then just use one of the destination register to
216      // use as the new base.
217      NewBase = Regs[NumRegs-1].first;
218    else {
219      // Use the scratch register to use as a new base.
220      NewBase = Scratch;
221      if (NewBase == 0)
222        return false;
223    }
224    int BaseOpc = !isThumb2
225      ? ARM::ADDri
226      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
227    if (Offset < 0) {
228      BaseOpc = !isThumb2
229        ? ARM::SUBri
230        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
231      Offset = - Offset;
232    }
233    int ImmedOffset = isThumb2
234      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235    if (ImmedOffset == -1)
236      // FIXME: Try t2ADDri12 or t2SUBri12?
237      return false;  // Probably not worth it then.
238
239    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241      .addImm(Pred).addReg(PredReg).addReg(0);
242    Base = NewBase;
243    BaseKill = true;  // New base is always killed right its use.
244  }
245
246  bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
247  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
248                Opcode == ARM::VLDRD);
249  Opcode = getLoadStoreMultipleOpcode(Opcode);
250  MachineInstrBuilder MIB = (isAM4)
251    ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
252        .addReg(Base, getKillRegState(BaseKill))
253        .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
254    : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255        .addReg(Base, getKillRegState(BaseKill))
256        .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
257        .addImm(Pred).addReg(PredReg);
258  for (unsigned i = 0; i != NumRegs; ++i)
259    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260                     | getKillRegState(Regs[i].second));
261
262  return true;
263}
264
265// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
266// success.
267void ARMLoadStoreOpt::
268MergeOpsUpdate(MachineBasicBlock &MBB,
269               MemOpQueue &memOps,
270               unsigned memOpsBegin,
271               unsigned memOpsEnd,
272               unsigned insertAfter,
273               int Offset,
274               unsigned Base,
275               bool BaseKill,
276               int Opcode,
277               ARMCC::CondCodes Pred,
278               unsigned PredReg,
279               unsigned Scratch,
280               DebugLoc dl,
281               SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282  // First calculate which of the registers should be killed by the merged
283  // instruction.
284  SmallVector<std::pair<unsigned, bool>, 8> Regs;
285  const unsigned insertPos = memOps[insertAfter].Position;
286  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287    const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288    unsigned Reg = MO.getReg();
289    bool isKill = MO.isKill();
290
291    // If we are inserting the merged operation after an unmerged operation that
292    // uses the same register, make sure to transfer any kill flag.
293    for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294      if (memOps[j].Position<insertPos) {
295        const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296        if (MOJ.getReg() == Reg && MOJ.isKill())
297          isKill = true;
298      }
299
300    Regs.push_back(std::make_pair(Reg, isKill));
301  }
302
303  // Try to do the merge.
304  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
305  Loc++;
306  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307                Pred, PredReg, Scratch, dl, Regs))
308    return;
309
310  // Merge succeeded, update records.
311  Merges.push_back(prior(Loc));
312  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313    // Remove kill flags from any unmerged memops that come before insertPos.
314    if (Regs[i-memOpsBegin].second)
315      for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316        if (memOps[j].Position<insertPos) {
317          MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318          if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319            MOJ.setIsKill(false);
320        }
321    MBB.erase(memOps[i].MBBI);
322    memOps[i].Merged = true;
323  }
324}
325
326/// MergeLDR_STR - Merge a number of load / store instructions into one or more
327/// load / store multiple instructions.
328void
329ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330                          unsigned Base, int Opcode, unsigned Size,
331                          ARMCC::CondCodes Pred, unsigned PredReg,
332                          unsigned Scratch, MemOpQueue &MemOps,
333                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335  int Offset = MemOps[SIndex].Offset;
336  int SOffset = Offset;
337  unsigned insertAfter = SIndex;
338  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339  DebugLoc dl = Loc->getDebugLoc();
340  const MachineOperand &PMO = Loc->getOperand(0);
341  unsigned PReg = PMO.getReg();
342  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343    : ARMRegisterInfo::getRegisterNumbering(PReg);
344  unsigned Count = 1;
345
346  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
347    int NewOffset = MemOps[i].Offset;
348    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
349    unsigned Reg = MO.getReg();
350    unsigned RegNum = MO.isUndef() ? UINT_MAX
351      : ARMRegisterInfo::getRegisterNumbering(Reg);
352    // AM4 - register numbers in ascending order.
353    // AM5 - consecutive register numbers in ascending order.
354    //       Can only do up to 16 double-word registers per insn.
355    if (Reg != ARM::SP &&
356        NewOffset == Offset + (int)Size &&
357        ((isAM4 && RegNum > PRegNum)
358         || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
359      Offset += Size;
360      PRegNum = RegNum;
361      ++Count;
362    } else {
363      // Can't merge this in. Try merge the earlier ones first.
364      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
365                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
366      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
367                   MemOps, Merges);
368      return;
369    }
370
371    if (MemOps[i].Position > MemOps[insertAfter].Position)
372      insertAfter = i;
373  }
374
375  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
376  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
377                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
378  return;
379}
380
381static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
382                                       unsigned Bytes, unsigned Limit,
383                                       ARMCC::CondCodes Pred, unsigned PredReg){
384  unsigned MyPredReg = 0;
385  if (!MI)
386    return false;
387  if (MI->getOpcode() != ARM::t2SUBri &&
388      MI->getOpcode() != ARM::t2SUBrSPi &&
389      MI->getOpcode() != ARM::t2SUBrSPi12 &&
390      MI->getOpcode() != ARM::tSUBspi &&
391      MI->getOpcode() != ARM::SUBri)
392    return false;
393
394  // Make sure the offset fits in 8 bits.
395  if (Bytes <= 0 || (Limit && Bytes >= Limit))
396    return false;
397
398  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
399  return (MI->getOperand(0).getReg() == Base &&
400          MI->getOperand(1).getReg() == Base &&
401          (MI->getOperand(2).getImm()*Scale) == Bytes &&
402          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
403          MyPredReg == PredReg);
404}
405
406static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
407                                       unsigned Bytes, unsigned Limit,
408                                       ARMCC::CondCodes Pred, unsigned PredReg){
409  unsigned MyPredReg = 0;
410  if (!MI)
411    return false;
412  if (MI->getOpcode() != ARM::t2ADDri &&
413      MI->getOpcode() != ARM::t2ADDrSPi &&
414      MI->getOpcode() != ARM::t2ADDrSPi12 &&
415      MI->getOpcode() != ARM::tADDspi &&
416      MI->getOpcode() != ARM::ADDri)
417    return false;
418
419  if (Bytes <= 0 || (Limit && Bytes >= Limit))
420    // Make sure the offset fits in 8 bits.
421    return false;
422
423  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
424  return (MI->getOperand(0).getReg() == Base &&
425          MI->getOperand(1).getReg() == Base &&
426          (MI->getOperand(2).getImm()*Scale) == Bytes &&
427          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
428          MyPredReg == PredReg);
429}
430
431static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
432  switch (MI->getOpcode()) {
433  default: return 0;
434  case ARM::LDR:
435  case ARM::STR:
436  case ARM::t2LDRi8:
437  case ARM::t2LDRi12:
438  case ARM::t2STRi8:
439  case ARM::t2STRi12:
440  case ARM::VLDRS:
441  case ARM::VSTRS:
442    return 4;
443  case ARM::VLDRD:
444  case ARM::VSTRD:
445    return 8;
446  case ARM::LDM:
447  case ARM::STM:
448  case ARM::t2LDM:
449  case ARM::t2STM:
450    return (MI->getNumOperands() - 4) * 4;
451  case ARM::VLDMS:
452  case ARM::VSTMS:
453  case ARM::VLDMD:
454  case ARM::VSTMD:
455    return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
456  }
457}
458
459static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
460  switch (Opc) {
461  case ARM::LDM: return ARM::LDM_UPD;
462  case ARM::STM: return ARM::STM_UPD;
463  case ARM::t2LDM: return ARM::t2LDM_UPD;
464  case ARM::t2STM: return ARM::t2STM_UPD;
465  case ARM::VLDMS: return ARM::VLDMS_UPD;
466  case ARM::VLDMD: return ARM::VLDMD_UPD;
467  case ARM::VSTMS: return ARM::VSTMS_UPD;
468  case ARM::VSTMD: return ARM::VSTMD_UPD;
469  default: llvm_unreachable("Unhandled opcode!");
470  }
471  return 0;
472}
473
474/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
475/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
476///
477/// stmia rn, <ra, rb, rc>
478/// rn := rn + 4 * 3;
479/// =>
480/// stmia rn!, <ra, rb, rc>
481///
482/// rn := rn - 4 * 3;
483/// ldmia rn, <ra, rb, rc>
484/// =>
485/// ldmdb rn!, <ra, rb, rc>
486bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
487                                               MachineBasicBlock::iterator MBBI,
488                                               bool &Advance,
489                                               MachineBasicBlock::iterator &I) {
490  MachineInstr *MI = MBBI;
491  unsigned Base = MI->getOperand(0).getReg();
492  bool BaseKill = MI->getOperand(0).isKill();
493  unsigned Bytes = getLSMultipleTransferSize(MI);
494  unsigned PredReg = 0;
495  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
496  int Opcode = MI->getOpcode();
497  DebugLoc dl = MI->getDebugLoc();
498  bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
499                Opcode == ARM::STM || Opcode == ARM::t2STM);
500
501  bool DoMerge = false;
502  ARM_AM::AMSubMode Mode = ARM_AM::ia;
503  unsigned Offset = 0;
504
505  if (isAM4) {
506    // Can't use an updating ld/st if the base register is also a dest
507    // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
508    for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
509      if (MI->getOperand(i).getReg() == Base)
510        return false;
511    }
512    Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
513  } else {
514    // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
515    Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
516    Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
517  }
518
519  // Try merging with the previous instruction.
520  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
521  if (MBBI != BeginMBBI) {
522    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
523    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
524      --PrevMBBI;
525    if (isAM4) {
526      if (Mode == ARM_AM::ia &&
527          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
528        DoMerge = true;
529        Mode = ARM_AM::db;
530      } else if (isAM4 && Mode == ARM_AM::ib &&
531                 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
532        DoMerge = true;
533        Mode = ARM_AM::da;
534      }
535    } else {
536      if (Mode == ARM_AM::ia &&
537          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
538        Mode = ARM_AM::db;
539        DoMerge = true;
540      }
541    }
542    if (DoMerge)
543      MBB.erase(PrevMBBI);
544  }
545
546  // Try merging with the next instruction.
547  MachineBasicBlock::iterator EndMBBI = MBB.end();
548  if (!DoMerge && MBBI != EndMBBI) {
549    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
550    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
551      ++NextMBBI;
552    if (isAM4) {
553      if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
554          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
555        DoMerge = true;
556      } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
557                 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
558        DoMerge = true;
559      }
560    } else {
561      if (Mode == ARM_AM::ia &&
562          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
563        DoMerge = true;
564      }
565    }
566    if (DoMerge) {
567      if (NextMBBI == I) {
568        Advance = true;
569        ++I;
570      }
571      MBB.erase(NextMBBI);
572    }
573  }
574
575  if (!DoMerge)
576    return false;
577
578  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
579  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
580    .addReg(Base, getDefRegState(true)) // WB base register
581    .addReg(Base, getKillRegState(BaseKill));
582  if (isAM4) {
583    // [t2]LDM_UPD, [t2]STM_UPD
584    MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
585      .addImm(Pred).addReg(PredReg);
586  } else {
587    // VLDM[SD}_UPD, VSTM[SD]_UPD
588    MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
589      .addImm(Pred).addReg(PredReg);
590  }
591  // Transfer the rest of operands.
592  for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
593    MIB.addOperand(MI->getOperand(OpNum));
594  // Transfer memoperands.
595  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
596
597  MBB.erase(MBBI);
598  return true;
599}
600
601static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
602  switch (Opc) {
603  case ARM::LDR: return ARM::LDR_PRE;
604  case ARM::STR: return ARM::STR_PRE;
605  case ARM::VLDRS: return ARM::VLDMS_UPD;
606  case ARM::VLDRD: return ARM::VLDMD_UPD;
607  case ARM::VSTRS: return ARM::VSTMS_UPD;
608  case ARM::VSTRD: return ARM::VSTMD_UPD;
609  case ARM::t2LDRi8:
610  case ARM::t2LDRi12:
611    return ARM::t2LDR_PRE;
612  case ARM::t2STRi8:
613  case ARM::t2STRi12:
614    return ARM::t2STR_PRE;
615  default: llvm_unreachable("Unhandled opcode!");
616  }
617  return 0;
618}
619
620static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
621  switch (Opc) {
622  case ARM::LDR: return ARM::LDR_POST;
623  case ARM::STR: return ARM::STR_POST;
624  case ARM::VLDRS: return ARM::VLDMS_UPD;
625  case ARM::VLDRD: return ARM::VLDMD_UPD;
626  case ARM::VSTRS: return ARM::VSTMS_UPD;
627  case ARM::VSTRD: return ARM::VSTMD_UPD;
628  case ARM::t2LDRi8:
629  case ARM::t2LDRi12:
630    return ARM::t2LDR_POST;
631  case ARM::t2STRi8:
632  case ARM::t2STRi12:
633    return ARM::t2STR_POST;
634  default: llvm_unreachable("Unhandled opcode!");
635  }
636  return 0;
637}
638
639/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
640/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
641bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
642                                               MachineBasicBlock::iterator MBBI,
643                                               const TargetInstrInfo *TII,
644                                               bool &Advance,
645                                               MachineBasicBlock::iterator &I) {
646  MachineInstr *MI = MBBI;
647  unsigned Base = MI->getOperand(1).getReg();
648  bool BaseKill = MI->getOperand(1).isKill();
649  unsigned Bytes = getLSMultipleTransferSize(MI);
650  int Opcode = MI->getOpcode();
651  DebugLoc dl = MI->getDebugLoc();
652  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
653                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
654  bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
655  if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
656    return false;
657  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
658    return false;
659  if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
660    if (MI->getOperand(2).getImm() != 0)
661      return false;
662
663  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
664  // Can't do the merge if the destination register is the same as the would-be
665  // writeback register.
666  if (isLd && MI->getOperand(0).getReg() == Base)
667    return false;
668
669  unsigned PredReg = 0;
670  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
671  bool DoMerge = false;
672  ARM_AM::AddrOpc AddSub = ARM_AM::add;
673  unsigned NewOpc = 0;
674  // AM2 - 12 bits, thumb2 - 8 bits.
675  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
676
677  // Try merging with the previous instruction.
678  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
679  if (MBBI != BeginMBBI) {
680    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
681    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
682      --PrevMBBI;
683    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
684      DoMerge = true;
685      AddSub = ARM_AM::sub;
686    } else if (!isAM5 &&
687               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
688      DoMerge = true;
689    }
690    if (DoMerge) {
691      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
692      MBB.erase(PrevMBBI);
693    }
694  }
695
696  // Try merging with the next instruction.
697  MachineBasicBlock::iterator EndMBBI = MBB.begin();
698  if (!DoMerge && MBBI != EndMBBI) {
699    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
700    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
701      ++NextMBBI;
702    if (!isAM5 &&
703        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
704      DoMerge = true;
705      AddSub = ARM_AM::sub;
706    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
707      DoMerge = true;
708    }
709    if (DoMerge) {
710      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
711      if (NextMBBI == I) {
712        Advance = true;
713        ++I;
714      }
715      MBB.erase(NextMBBI);
716    }
717  }
718
719  if (!DoMerge)
720    return false;
721
722  bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
723  unsigned Offset = 0;
724  if (isAM5)
725    Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
726                               (isDPR ? 2 : 1));
727  else if (isAM2)
728    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
729  else
730    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
731
732  if (isAM5) {
733    // VLDM[SD}_UPD, VSTM[SD]_UPD
734    MachineOperand &MO = MI->getOperand(0);
735    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
736      .addReg(Base, getDefRegState(true)) // WB base register
737      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
738      .addImm(Offset)
739      .addImm(Pred).addReg(PredReg)
740      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
741                            getKillRegState(MO.isKill())));
742  } else if (isLd) {
743    if (isAM2)
744      // LDR_PRE, LDR_POST,
745      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
746        .addReg(Base, RegState::Define)
747        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
748    else
749      // t2LDR_PRE, t2LDR_POST
750      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
751        .addReg(Base, RegState::Define)
752        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
753  } else {
754    MachineOperand &MO = MI->getOperand(0);
755    if (isAM2)
756      // STR_PRE, STR_POST
757      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
758        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
759        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
760    else
761      // t2STR_PRE, t2STR_POST
762      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
763        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
764        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
765  }
766  MBB.erase(MBBI);
767
768  return true;
769}
770
771/// isMemoryOp - Returns true if instruction is a memory operations (that this
772/// pass is capable of operating on).
773static bool isMemoryOp(const MachineInstr *MI) {
774  if (MI->hasOneMemOperand()) {
775    const MachineMemOperand *MMO = *MI->memoperands_begin();
776
777    // Don't touch volatile memory accesses - we may be changing their order.
778    if (MMO->isVolatile())
779      return false;
780
781    // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
782    // not.
783    if (MMO->getAlignment() < 4)
784      return false;
785  }
786
787  // str <undef> could probably be eliminated entirely, but for now we just want
788  // to avoid making a mess of it.
789  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
790  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
791      MI->getOperand(0).isUndef())
792    return false;
793
794  // Likewise don't mess with references to undefined addresses.
795  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
796      MI->getOperand(1).isUndef())
797    return false;
798
799  int Opcode = MI->getOpcode();
800  switch (Opcode) {
801  default: break;
802  case ARM::LDR:
803  case ARM::STR:
804    return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
805  case ARM::VLDRS:
806  case ARM::VSTRS:
807    return MI->getOperand(1).isReg();
808  case ARM::VLDRD:
809  case ARM::VSTRD:
810    return MI->getOperand(1).isReg();
811  case ARM::t2LDRi8:
812  case ARM::t2LDRi12:
813  case ARM::t2STRi8:
814  case ARM::t2STRi12:
815    return MI->getOperand(1).isReg();
816  }
817  return false;
818}
819
820/// AdvanceRS - Advance register scavenger to just before the earliest memory
821/// op that is being merged.
822void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
823  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
824  unsigned Position = MemOps[0].Position;
825  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
826    if (MemOps[i].Position < Position) {
827      Position = MemOps[i].Position;
828      Loc = MemOps[i].MBBI;
829    }
830  }
831
832  if (Loc != MBB.begin())
833    RS->forward(prior(Loc));
834}
835
836static int getMemoryOpOffset(const MachineInstr *MI) {
837  int Opcode = MI->getOpcode();
838  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
839  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
840  unsigned NumOperands = MI->getDesc().getNumOperands();
841  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
842
843  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
844      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
845      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
846    return OffField;
847
848  int Offset = isAM2
849    ? ARM_AM::getAM2Offset(OffField)
850    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
851             : ARM_AM::getAM5Offset(OffField) * 4);
852  if (isAM2) {
853    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
854      Offset = -Offset;
855  } else if (isAM3) {
856    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
857      Offset = -Offset;
858  } else {
859    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
860      Offset = -Offset;
861  }
862  return Offset;
863}
864
865static void InsertLDR_STR(MachineBasicBlock &MBB,
866                          MachineBasicBlock::iterator &MBBI,
867                          int OffImm, bool isDef,
868                          DebugLoc dl, unsigned NewOpc,
869                          unsigned Reg, bool RegDeadKill, bool RegUndef,
870                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
871                          unsigned OffReg, bool OffKill, bool OffUndef,
872                          ARMCC::CondCodes Pred, unsigned PredReg,
873                          const TargetInstrInfo *TII, bool isT2) {
874  int Offset = OffImm;
875  if (!isT2) {
876    if (OffImm < 0)
877      Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
878    else
879      Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
880  }
881  if (isDef) {
882    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
883                                      TII->get(NewOpc))
884      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
885      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
886    if (!isT2)
887      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
888    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
889  } else {
890    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
891                                      TII->get(NewOpc))
892      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
893      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
894    if (!isT2)
895      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
896    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
897  }
898}
899
900bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
901                                          MachineBasicBlock::iterator &MBBI) {
902  MachineInstr *MI = &*MBBI;
903  unsigned Opcode = MI->getOpcode();
904  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
905      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
906    unsigned EvenReg = MI->getOperand(0).getReg();
907    unsigned OddReg  = MI->getOperand(1).getReg();
908    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
909    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
910    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
911      return false;
912
913    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
914    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
915    bool EvenDeadKill = isLd ?
916      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
917    bool EvenUndef = MI->getOperand(0).isUndef();
918    bool OddDeadKill  = isLd ?
919      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
920    bool OddUndef = MI->getOperand(1).isUndef();
921    const MachineOperand &BaseOp = MI->getOperand(2);
922    unsigned BaseReg = BaseOp.getReg();
923    bool BaseKill = BaseOp.isKill();
924    bool BaseUndef = BaseOp.isUndef();
925    unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
926    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
927    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
928    int OffImm = getMemoryOpOffset(MI);
929    unsigned PredReg = 0;
930    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
931
932    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
933      // Ascending register numbers and no offset. It's safe to change it to a
934      // ldm or stm.
935      unsigned NewOpc = (isLd)
936        ? (isT2 ? ARM::t2LDM : ARM::LDM)
937        : (isT2 ? ARM::t2STM : ARM::STM);
938      if (isLd) {
939        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
940          .addReg(BaseReg, getKillRegState(BaseKill))
941          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
942          .addImm(Pred).addReg(PredReg)
943          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
944          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
945        ++NumLDRD2LDM;
946      } else {
947        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
948          .addReg(BaseReg, getKillRegState(BaseKill))
949          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
950          .addImm(Pred).addReg(PredReg)
951          .addReg(EvenReg,
952                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
953          .addReg(OddReg,
954                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
955        ++NumSTRD2STM;
956      }
957    } else {
958      // Split into two instructions.
959      assert((!isT2 || !OffReg) &&
960             "Thumb2 ldrd / strd does not encode offset register!");
961      unsigned NewOpc = (isLd)
962        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
963        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
964      DebugLoc dl = MBBI->getDebugLoc();
965      // If this is a load and base register is killed, it may have been
966      // re-defed by the load, make sure the first load does not clobber it.
967      if (isLd &&
968          (BaseKill || OffKill) &&
969          (TRI->regsOverlap(EvenReg, BaseReg) ||
970           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
971        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
972               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
973        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
974                      OddReg, OddDeadKill, false,
975                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
976                      Pred, PredReg, TII, isT2);
977        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
978                      EvenReg, EvenDeadKill, false,
979                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
980                      Pred, PredReg, TII, isT2);
981      } else {
982        if (OddReg == EvenReg && EvenDeadKill) {
983          // If the two source operands are the same, the kill marker is
984          // probably on the first one. e.g.
985          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
986          EvenDeadKill = false;
987          OddDeadKill = true;
988        }
989        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
990                      EvenReg, EvenDeadKill, EvenUndef,
991                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
992                      Pred, PredReg, TII, isT2);
993        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
994                      OddReg, OddDeadKill, OddUndef,
995                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
996                      Pred, PredReg, TII, isT2);
997      }
998      if (isLd)
999        ++NumLDRD2LDR;
1000      else
1001        ++NumSTRD2STR;
1002    }
1003
1004    MBBI = prior(MBBI);
1005    MBB.erase(MI);
1006  }
1007  return false;
1008}
1009
1010/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1011/// ops of the same base and incrementing offset into LDM / STM ops.
1012bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1013  unsigned NumMerges = 0;
1014  unsigned NumMemOps = 0;
1015  MemOpQueue MemOps;
1016  unsigned CurrBase = 0;
1017  int CurrOpc = -1;
1018  unsigned CurrSize = 0;
1019  ARMCC::CondCodes CurrPred = ARMCC::AL;
1020  unsigned CurrPredReg = 0;
1021  unsigned Position = 0;
1022  SmallVector<MachineBasicBlock::iterator,4> Merges;
1023
1024  RS->enterBasicBlock(&MBB);
1025  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1026  while (MBBI != E) {
1027    if (FixInvalidRegPairOp(MBB, MBBI))
1028      continue;
1029
1030    bool Advance  = false;
1031    bool TryMerge = false;
1032    bool Clobber  = false;
1033
1034    bool isMemOp = isMemoryOp(MBBI);
1035    if (isMemOp) {
1036      int Opcode = MBBI->getOpcode();
1037      unsigned Size = getLSMultipleTransferSize(MBBI);
1038      unsigned Base = MBBI->getOperand(1).getReg();
1039      unsigned PredReg = 0;
1040      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1041      int Offset = getMemoryOpOffset(MBBI);
1042      // Watch out for:
1043      // r4 := ldr [r5]
1044      // r5 := ldr [r5, #4]
1045      // r6 := ldr [r5, #8]
1046      //
1047      // The second ldr has effectively broken the chain even though it
1048      // looks like the later ldr(s) use the same base register. Try to
1049      // merge the ldr's so far, including this one. But don't try to
1050      // combine the following ldr(s).
1051      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1052      if (CurrBase == 0 && !Clobber) {
1053        // Start of a new chain.
1054        CurrBase = Base;
1055        CurrOpc  = Opcode;
1056        CurrSize = Size;
1057        CurrPred = Pred;
1058        CurrPredReg = PredReg;
1059        MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1060        NumMemOps++;
1061        Advance = true;
1062      } else {
1063        if (Clobber) {
1064          TryMerge = true;
1065          Advance = true;
1066        }
1067
1068        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1069          // No need to match PredReg.
1070          // Continue adding to the queue.
1071          if (Offset > MemOps.back().Offset) {
1072            MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1073            NumMemOps++;
1074            Advance = true;
1075          } else {
1076            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1077                 I != E; ++I) {
1078              if (Offset < I->Offset) {
1079                MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1080                NumMemOps++;
1081                Advance = true;
1082                break;
1083              } else if (Offset == I->Offset) {
1084                // Collision! This can't be merged!
1085                break;
1086              }
1087            }
1088          }
1089        }
1090      }
1091    }
1092
1093    if (Advance) {
1094      ++Position;
1095      ++MBBI;
1096      if (MBBI == E)
1097        // Reach the end of the block, try merging the memory instructions.
1098        TryMerge = true;
1099    } else
1100      TryMerge = true;
1101
1102    if (TryMerge) {
1103      if (NumMemOps > 1) {
1104        // Try to find a free register to use as a new base in case it's needed.
1105        // First advance to the instruction just before the start of the chain.
1106        AdvanceRS(MBB, MemOps);
1107        // Find a scratch register.
1108        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1109        // Process the load / store instructions.
1110        RS->forward(prior(MBBI));
1111
1112        // Merge ops.
1113        Merges.clear();
1114        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1115                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1116
1117        // Try folding preceeding/trailing base inc/dec into the generated
1118        // LDM/STM ops.
1119        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1120          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1121            ++NumMerges;
1122        NumMerges += Merges.size();
1123
1124        // Try folding preceeding/trailing base inc/dec into those load/store
1125        // that were not merged to form LDM/STM ops.
1126        for (unsigned i = 0; i != NumMemOps; ++i)
1127          if (!MemOps[i].Merged)
1128            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1129              ++NumMerges;
1130
1131        // RS may be pointing to an instruction that's deleted.
1132        RS->skipTo(prior(MBBI));
1133      } else if (NumMemOps == 1) {
1134        // Try folding preceeding/trailing base inc/dec into the single
1135        // load/store.
1136        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1137          ++NumMerges;
1138          RS->forward(prior(MBBI));
1139        }
1140      }
1141
1142      CurrBase = 0;
1143      CurrOpc = -1;
1144      CurrSize = 0;
1145      CurrPred = ARMCC::AL;
1146      CurrPredReg = 0;
1147      if (NumMemOps) {
1148        MemOps.clear();
1149        NumMemOps = 0;
1150      }
1151
1152      // If iterator hasn't been advanced and this is not a memory op, skip it.
1153      // It can't start a new chain anyway.
1154      if (!Advance && !isMemOp && MBBI != E) {
1155        ++Position;
1156        ++MBBI;
1157      }
1158    }
1159  }
1160  return NumMerges > 0;
1161}
1162
1163namespace {
1164  struct OffsetCompare {
1165    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1166      int LOffset = getMemoryOpOffset(LHS);
1167      int ROffset = getMemoryOpOffset(RHS);
1168      assert(LHS == RHS || LOffset != ROffset);
1169      return LOffset > ROffset;
1170    }
1171  };
1172}
1173
1174/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1175/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1176/// directly restore the value of LR into pc.
1177///   ldmfd sp!, {..., lr}
1178///   bx lr
1179/// or
1180///   ldmfd sp!, {..., lr}
1181///   mov pc, lr
1182/// =>
1183///   ldmfd sp!, {..., pc}
1184bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1185  if (MBB.empty()) return false;
1186
1187  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1188  if (MBBI != MBB.begin() &&
1189      (MBBI->getOpcode() == ARM::BX_RET ||
1190       MBBI->getOpcode() == ARM::tBX_RET ||
1191       MBBI->getOpcode() == ARM::MOVPCLR)) {
1192    MachineInstr *PrevMI = prior(MBBI);
1193    if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1194        PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1195      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1196      if (MO.getReg() != ARM::LR)
1197        return false;
1198      unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1199      PrevMI->setDesc(TII->get(NewOpc));
1200      MO.setReg(ARM::PC);
1201      MBB.erase(MBBI);
1202      return true;
1203    }
1204  }
1205  return false;
1206}
1207
1208bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1209  const TargetMachine &TM = Fn.getTarget();
1210  AFI = Fn.getInfo<ARMFunctionInfo>();
1211  TII = TM.getInstrInfo();
1212  TRI = TM.getRegisterInfo();
1213  RS = new RegScavenger();
1214  isThumb2 = AFI->isThumb2Function();
1215
1216  bool Modified = false;
1217  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1218       ++MFI) {
1219    MachineBasicBlock &MBB = *MFI;
1220    Modified |= LoadStoreMultipleOpti(MBB);
1221    Modified |= MergeReturnIntoLDM(MBB);
1222  }
1223
1224  delete RS;
1225  return Modified;
1226}
1227
1228
1229/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1230/// load / stores from consecutive locations close to make it more
1231/// likely they will be combined later.
1232
1233namespace {
1234  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1235    static char ID;
1236    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1237
1238    const TargetData *TD;
1239    const TargetInstrInfo *TII;
1240    const TargetRegisterInfo *TRI;
1241    const ARMSubtarget *STI;
1242    MachineRegisterInfo *MRI;
1243    MachineFunction *MF;
1244
1245    virtual bool runOnMachineFunction(MachineFunction &Fn);
1246
1247    virtual const char *getPassName() const {
1248      return "ARM pre- register allocation load / store optimization pass";
1249    }
1250
1251  private:
1252    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1253                          unsigned &NewOpc, unsigned &EvenReg,
1254                          unsigned &OddReg, unsigned &BaseReg,
1255                          unsigned &OffReg, int &Offset,
1256                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1257                          bool &isT2);
1258    bool RescheduleOps(MachineBasicBlock *MBB,
1259                       SmallVector<MachineInstr*, 4> &Ops,
1260                       unsigned Base, bool isLd,
1261                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1262    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1263  };
1264  char ARMPreAllocLoadStoreOpt::ID = 0;
1265}
1266
1267bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1268  TD  = Fn.getTarget().getTargetData();
1269  TII = Fn.getTarget().getInstrInfo();
1270  TRI = Fn.getTarget().getRegisterInfo();
1271  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1272  MRI = &Fn.getRegInfo();
1273  MF  = &Fn;
1274
1275  bool Modified = false;
1276  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1277       ++MFI)
1278    Modified |= RescheduleLoadStoreInstrs(MFI);
1279
1280  return Modified;
1281}
1282
1283static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1284                                      MachineBasicBlock::iterator I,
1285                                      MachineBasicBlock::iterator E,
1286                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1287                                      SmallSet<unsigned, 4> &MemRegs,
1288                                      const TargetRegisterInfo *TRI) {
1289  // Are there stores / loads / calls between them?
1290  // FIXME: This is overly conservative. We should make use of alias information
1291  // some day.
1292  SmallSet<unsigned, 4> AddedRegPressure;
1293  while (++I != E) {
1294    if (I->isDebugValue() || MemOps.count(&*I))
1295      continue;
1296    const TargetInstrDesc &TID = I->getDesc();
1297    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1298      return false;
1299    if (isLd && TID.mayStore())
1300      return false;
1301    if (!isLd) {
1302      if (TID.mayLoad())
1303        return false;
1304      // It's not safe to move the first 'str' down.
1305      // str r1, [r0]
1306      // strh r5, [r0]
1307      // str r4, [r0, #+4]
1308      if (TID.mayStore())
1309        return false;
1310    }
1311    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1312      MachineOperand &MO = I->getOperand(j);
1313      if (!MO.isReg())
1314        continue;
1315      unsigned Reg = MO.getReg();
1316      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1317        return false;
1318      if (Reg != Base && !MemRegs.count(Reg))
1319        AddedRegPressure.insert(Reg);
1320    }
1321  }
1322
1323  // Estimate register pressure increase due to the transformation.
1324  if (MemRegs.size() <= 4)
1325    // Ok if we are moving small number of instructions.
1326    return true;
1327  return AddedRegPressure.size() <= MemRegs.size() * 2;
1328}
1329
1330bool
1331ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1332                                          DebugLoc &dl,
1333                                          unsigned &NewOpc, unsigned &EvenReg,
1334                                          unsigned &OddReg, unsigned &BaseReg,
1335                                          unsigned &OffReg, int &Offset,
1336                                          unsigned &PredReg,
1337                                          ARMCC::CondCodes &Pred,
1338                                          bool &isT2) {
1339  // Make sure we're allowed to generate LDRD/STRD.
1340  if (!STI->hasV5TEOps())
1341    return false;
1342
1343  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1344  unsigned Scale = 1;
1345  unsigned Opcode = Op0->getOpcode();
1346  if (Opcode == ARM::LDR)
1347    NewOpc = ARM::LDRD;
1348  else if (Opcode == ARM::STR)
1349    NewOpc = ARM::STRD;
1350  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1351    NewOpc = ARM::t2LDRDi8;
1352    Scale = 4;
1353    isT2 = true;
1354  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1355    NewOpc = ARM::t2STRDi8;
1356    Scale = 4;
1357    isT2 = true;
1358  } else
1359    return false;
1360
1361  // Make sure the offset registers match.
1362  if (!isT2 &&
1363      (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1364      return false;
1365
1366  // Must sure the base address satisfies i64 ld / st alignment requirement.
1367  if (!Op0->hasOneMemOperand() ||
1368      !(*Op0->memoperands_begin())->getValue() ||
1369      (*Op0->memoperands_begin())->isVolatile())
1370    return false;
1371
1372  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1373  const Function *Func = MF->getFunction();
1374  unsigned ReqAlign = STI->hasV6Ops()
1375    ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1376    : 8;  // Pre-v6 need 8-byte align
1377  if (Align < ReqAlign)
1378    return false;
1379
1380  // Then make sure the immediate offset fits.
1381  int OffImm = getMemoryOpOffset(Op0);
1382  if (isT2) {
1383    if (OffImm < 0) {
1384      if (OffImm < -255)
1385        // Can't fall back to t2LDRi8 / t2STRi8.
1386        return false;
1387    } else {
1388      int Limit = (1 << 8) * Scale;
1389      if (OffImm >= Limit || (OffImm & (Scale-1)))
1390        return false;
1391    }
1392    Offset = OffImm;
1393  } else {
1394    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1395    if (OffImm < 0) {
1396      AddSub = ARM_AM::sub;
1397      OffImm = - OffImm;
1398    }
1399    int Limit = (1 << 8) * Scale;
1400    if (OffImm >= Limit || (OffImm & (Scale-1)))
1401      return false;
1402    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1403  }
1404  EvenReg = Op0->getOperand(0).getReg();
1405  OddReg  = Op1->getOperand(0).getReg();
1406  if (EvenReg == OddReg)
1407    return false;
1408  BaseReg = Op0->getOperand(1).getReg();
1409  if (!isT2)
1410    OffReg = Op0->getOperand(2).getReg();
1411  Pred = llvm::getInstrPredicate(Op0, PredReg);
1412  dl = Op0->getDebugLoc();
1413  return true;
1414}
1415
1416bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1417                                 SmallVector<MachineInstr*, 4> &Ops,
1418                                 unsigned Base, bool isLd,
1419                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1420  bool RetVal = false;
1421
1422  // Sort by offset (in reverse order).
1423  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1424
1425  // The loads / stores of the same base are in order. Scan them from first to
1426  // last and check for the following:
1427  // 1. Any def of base.
1428  // 2. Any gaps.
1429  while (Ops.size() > 1) {
1430    unsigned FirstLoc = ~0U;
1431    unsigned LastLoc = 0;
1432    MachineInstr *FirstOp = 0;
1433    MachineInstr *LastOp = 0;
1434    int LastOffset = 0;
1435    unsigned LastOpcode = 0;
1436    unsigned LastBytes = 0;
1437    unsigned NumMove = 0;
1438    for (int i = Ops.size() - 1; i >= 0; --i) {
1439      MachineInstr *Op = Ops[i];
1440      unsigned Loc = MI2LocMap[Op];
1441      if (Loc <= FirstLoc) {
1442        FirstLoc = Loc;
1443        FirstOp = Op;
1444      }
1445      if (Loc >= LastLoc) {
1446        LastLoc = Loc;
1447        LastOp = Op;
1448      }
1449
1450      unsigned Opcode = Op->getOpcode();
1451      if (LastOpcode && Opcode != LastOpcode)
1452        break;
1453
1454      int Offset = getMemoryOpOffset(Op);
1455      unsigned Bytes = getLSMultipleTransferSize(Op);
1456      if (LastBytes) {
1457        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1458          break;
1459      }
1460      LastOffset = Offset;
1461      LastBytes = Bytes;
1462      LastOpcode = Opcode;
1463      if (++NumMove == 8) // FIXME: Tune this limit.
1464        break;
1465    }
1466
1467    if (NumMove <= 1)
1468      Ops.pop_back();
1469    else {
1470      SmallPtrSet<MachineInstr*, 4> MemOps;
1471      SmallSet<unsigned, 4> MemRegs;
1472      for (int i = NumMove-1; i >= 0; --i) {
1473        MemOps.insert(Ops[i]);
1474        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1475      }
1476
1477      // Be conservative, if the instructions are too far apart, don't
1478      // move them. We want to limit the increase of register pressure.
1479      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1480      if (DoMove)
1481        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1482                                           MemOps, MemRegs, TRI);
1483      if (!DoMove) {
1484        for (unsigned i = 0; i != NumMove; ++i)
1485          Ops.pop_back();
1486      } else {
1487        // This is the new location for the loads / stores.
1488        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1489        while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1490          ++InsertPos;
1491
1492        // If we are moving a pair of loads / stores, see if it makes sense
1493        // to try to allocate a pair of registers that can form register pairs.
1494        MachineInstr *Op0 = Ops.back();
1495        MachineInstr *Op1 = Ops[Ops.size()-2];
1496        unsigned EvenReg = 0, OddReg = 0;
1497        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1498        ARMCC::CondCodes Pred = ARMCC::AL;
1499        bool isT2 = false;
1500        unsigned NewOpc = 0;
1501        int Offset = 0;
1502        DebugLoc dl;
1503        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1504                                             EvenReg, OddReg, BaseReg, OffReg,
1505                                             Offset, PredReg, Pred, isT2)) {
1506          Ops.pop_back();
1507          Ops.pop_back();
1508
1509          // Form the pair instruction.
1510          if (isLd) {
1511            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1512                                              dl, TII->get(NewOpc))
1513              .addReg(EvenReg, RegState::Define)
1514              .addReg(OddReg, RegState::Define)
1515              .addReg(BaseReg);
1516            if (!isT2)
1517              MIB.addReg(OffReg);
1518            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1519            ++NumLDRDFormed;
1520          } else {
1521            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1522                                              dl, TII->get(NewOpc))
1523              .addReg(EvenReg)
1524              .addReg(OddReg)
1525              .addReg(BaseReg);
1526            if (!isT2)
1527              MIB.addReg(OffReg);
1528            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1529            ++NumSTRDFormed;
1530          }
1531          MBB->erase(Op0);
1532          MBB->erase(Op1);
1533
1534          // Add register allocation hints to form register pairs.
1535          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1536          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1537        } else {
1538          for (unsigned i = 0; i != NumMove; ++i) {
1539            MachineInstr *Op = Ops.back();
1540            Ops.pop_back();
1541            MBB->splice(InsertPos, MBB, Op);
1542          }
1543        }
1544
1545        NumLdStMoved += NumMove;
1546        RetVal = true;
1547      }
1548    }
1549  }
1550
1551  return RetVal;
1552}
1553
1554bool
1555ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1556  bool RetVal = false;
1557
1558  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1559  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1560  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1561  SmallVector<unsigned, 4> LdBases;
1562  SmallVector<unsigned, 4> StBases;
1563
1564  unsigned Loc = 0;
1565  MachineBasicBlock::iterator MBBI = MBB->begin();
1566  MachineBasicBlock::iterator E = MBB->end();
1567  while (MBBI != E) {
1568    for (; MBBI != E; ++MBBI) {
1569      MachineInstr *MI = MBBI;
1570      const TargetInstrDesc &TID = MI->getDesc();
1571      if (TID.isCall() || TID.isTerminator()) {
1572        // Stop at barriers.
1573        ++MBBI;
1574        break;
1575      }
1576
1577      if (!MI->isDebugValue())
1578        MI2LocMap[MI] = ++Loc;
1579
1580      if (!isMemoryOp(MI))
1581        continue;
1582      unsigned PredReg = 0;
1583      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1584        continue;
1585
1586      int Opc = MI->getOpcode();
1587      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1588      unsigned Base = MI->getOperand(1).getReg();
1589      int Offset = getMemoryOpOffset(MI);
1590
1591      bool StopHere = false;
1592      if (isLd) {
1593        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1594          Base2LdsMap.find(Base);
1595        if (BI != Base2LdsMap.end()) {
1596          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1597            if (Offset == getMemoryOpOffset(BI->second[i])) {
1598              StopHere = true;
1599              break;
1600            }
1601          }
1602          if (!StopHere)
1603            BI->second.push_back(MI);
1604        } else {
1605          SmallVector<MachineInstr*, 4> MIs;
1606          MIs.push_back(MI);
1607          Base2LdsMap[Base] = MIs;
1608          LdBases.push_back(Base);
1609        }
1610      } else {
1611        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1612          Base2StsMap.find(Base);
1613        if (BI != Base2StsMap.end()) {
1614          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1615            if (Offset == getMemoryOpOffset(BI->second[i])) {
1616              StopHere = true;
1617              break;
1618            }
1619          }
1620          if (!StopHere)
1621            BI->second.push_back(MI);
1622        } else {
1623          SmallVector<MachineInstr*, 4> MIs;
1624          MIs.push_back(MI);
1625          Base2StsMap[Base] = MIs;
1626          StBases.push_back(Base);
1627        }
1628      }
1629
1630      if (StopHere) {
1631        // Found a duplicate (a base+offset combination that's seen earlier).
1632        // Backtrack.
1633        --Loc;
1634        break;
1635      }
1636    }
1637
1638    // Re-schedule loads.
1639    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1640      unsigned Base = LdBases[i];
1641      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1642      if (Lds.size() > 1)
1643        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1644    }
1645
1646    // Re-schedule stores.
1647    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1648      unsigned Base = StBases[i];
1649      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1650      if (Sts.size() > 1)
1651        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1652    }
1653
1654    if (MBBI != E) {
1655      Base2LdsMap.clear();
1656      Base2StsMap.clear();
1657      LdBases.clear();
1658      StBases.clear();
1659    }
1660  }
1661
1662  return RetVal;
1663}
1664
1665
1666/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1667/// optimization pass.
1668FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1669  if (PreAlloc)
1670    return new ARMPreAllocLoadStoreOpt();
1671  return new ARMLoadStoreOpt();
1672}
1673