ARMLoadStoreOptimizer.cpp revision 2d357f6b44159c59dbb58e03a22f94312696d064
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMBaseInstrInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMRegisterInfo.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/Target/TargetData.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetMachine.h"
32#include "llvm/Target/TargetRegisterInfo.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallSet.h"
38#include "llvm/ADT/SmallVector.h"
39#include "llvm/ADT/Statistic.h"
40using namespace llvm;
41
42STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43STATISTIC(NumSTMGened , "Number of stm instructions generated");
44STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
50STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
51STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
52STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
53
54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55/// load / store instructions to form ldm / stm instructions.
56
57namespace {
58  struct ARMLoadStoreOpt : public MachineFunctionPass {
59    static char ID;
60    ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
61
62    const TargetInstrInfo *TII;
63    const TargetRegisterInfo *TRI;
64    ARMFunctionInfo *AFI;
65    RegScavenger *RS;
66    bool isThumb2;
67
68    virtual bool runOnMachineFunction(MachineFunction &Fn);
69
70    virtual const char *getPassName() const {
71      return "ARM load / store optimization pass";
72    }
73
74  private:
75    struct MemOpQueueEntry {
76      int Offset;
77      unsigned Position;
78      MachineBasicBlock::iterator MBBI;
79      bool Merged;
80      MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
81        : Offset(o), Position(p), MBBI(i), Merged(false) {}
82    };
83    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
84    typedef MemOpQueue::iterator MemOpQueueIter;
85
86    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
87                  int Offset, unsigned Base, bool BaseKill, int Opcode,
88                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
89                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
90    void MergeOpsUpdate(MachineBasicBlock &MBB,
91                        MemOpQueue &MemOps,
92                        unsigned memOpsBegin,
93                        unsigned memOpsEnd,
94                        unsigned insertAfter,
95                        int Offset,
96                        unsigned Base,
97                        bool BaseKill,
98                        int Opcode,
99                        ARMCC::CondCodes Pred,
100                        unsigned PredReg,
101                        unsigned Scratch,
102                        DebugLoc dl,
103                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
104    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
105                      int Opcode, unsigned Size,
106                      ARMCC::CondCodes Pred, unsigned PredReg,
107                      unsigned Scratch, MemOpQueue &MemOps,
108                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
109
110    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
111    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
112                             MachineBasicBlock::iterator &MBBI);
113    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
114                                  MachineBasicBlock::iterator MBBI,
115                                  const TargetInstrInfo *TII,
116                                  bool &Advance,
117                                  MachineBasicBlock::iterator &I);
118    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
119                                   MachineBasicBlock::iterator MBBI,
120                                   bool &Advance,
121                                   MachineBasicBlock::iterator &I);
122    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
123    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
124  };
125  char ARMLoadStoreOpt::ID = 0;
126}
127
128static int getLoadStoreMultipleOpcode(int Opcode) {
129  switch (Opcode) {
130  case ARM::LDR:
131    NumLDMGened++;
132    return ARM::LDM;
133  case ARM::STR:
134    NumSTMGened++;
135    return ARM::STM;
136  case ARM::t2LDRi8:
137  case ARM::t2LDRi12:
138    NumLDMGened++;
139    return ARM::t2LDM;
140  case ARM::t2STRi8:
141  case ARM::t2STRi12:
142    NumSTMGened++;
143    return ARM::t2STM;
144  case ARM::VLDRS:
145    NumVLDMGened++;
146    return ARM::VLDMS;
147  case ARM::VSTRS:
148    NumVSTMGened++;
149    return ARM::VSTMS;
150  case ARM::VLDRD:
151    NumVLDMGened++;
152    return ARM::VLDMD;
153  case ARM::VSTRD:
154    NumVSTMGened++;
155    return ARM::VSTMD;
156  default: llvm_unreachable("Unhandled opcode!");
157  }
158  return 0;
159}
160
161static bool isT2i32Load(unsigned Opc) {
162  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
163}
164
165static bool isi32Load(unsigned Opc) {
166  return Opc == ARM::LDR || isT2i32Load(Opc);
167}
168
169static bool isT2i32Store(unsigned Opc) {
170  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
171}
172
173static bool isi32Store(unsigned Opc) {
174  return Opc == ARM::STR || isT2i32Store(Opc);
175}
176
177/// MergeOps - Create and insert a LDM or STM with Base as base register and
178/// registers in Regs as the register operands that would be loaded / stored.
179/// It returns true if the transformation is done.
180bool
181ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
182                          MachineBasicBlock::iterator MBBI,
183                          int Offset, unsigned Base, bool BaseKill,
184                          int Opcode, ARMCC::CondCodes Pred,
185                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
186                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
187  // Only a single register to load / store. Don't bother.
188  unsigned NumRegs = Regs.size();
189  if (NumRegs <= 1)
190    return false;
191
192  ARM_AM::AMSubMode Mode = ARM_AM::ia;
193  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
194  if (isAM4 && Offset == 4) {
195    if (isThumb2)
196      // Thumb2 does not support ldmib / stmib.
197      return false;
198    Mode = ARM_AM::ib;
199  } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
200    if (isThumb2)
201      // Thumb2 does not support ldmda / stmda.
202      return false;
203    Mode = ARM_AM::da;
204  } else if (isAM4 && Offset == -4 * (int)NumRegs) {
205    Mode = ARM_AM::db;
206  } else if (Offset != 0) {
207    // If starting offset isn't zero, insert a MI to materialize a new base.
208    // But only do so if it is cost effective, i.e. merging more than two
209    // loads / stores.
210    if (NumRegs <= 2)
211      return false;
212
213    unsigned NewBase;
214    if (isi32Load(Opcode))
215      // If it is a load, then just use one of the destination register to
216      // use as the new base.
217      NewBase = Regs[NumRegs-1].first;
218    else {
219      // Use the scratch register to use as a new base.
220      NewBase = Scratch;
221      if (NewBase == 0)
222        return false;
223    }
224    int BaseOpc = !isThumb2
225      ? ARM::ADDri
226      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
227    if (Offset < 0) {
228      BaseOpc = !isThumb2
229        ? ARM::SUBri
230        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
231      Offset = - Offset;
232    }
233    int ImmedOffset = isThumb2
234      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
235    if (ImmedOffset == -1)
236      // FIXME: Try t2ADDri12 or t2SUBri12?
237      return false;  // Probably not worth it then.
238
239    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
240      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
241      .addImm(Pred).addReg(PredReg).addReg(0);
242    Base = NewBase;
243    BaseKill = true;  // New base is always killed right its use.
244  }
245
246  bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
247  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
248                Opcode == ARM::VLDRD);
249  Opcode = getLoadStoreMultipleOpcode(Opcode);
250  MachineInstrBuilder MIB = (isAM4)
251    ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
252        .addReg(Base, getKillRegState(BaseKill))
253        .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
254    : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
255        .addReg(Base, getKillRegState(BaseKill))
256        .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
257        .addImm(Pred).addReg(PredReg);
258  for (unsigned i = 0; i != NumRegs; ++i)
259    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
260                     | getKillRegState(Regs[i].second));
261
262  return true;
263}
264
265// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
266// success.
267void ARMLoadStoreOpt::
268MergeOpsUpdate(MachineBasicBlock &MBB,
269               MemOpQueue &memOps,
270               unsigned memOpsBegin,
271               unsigned memOpsEnd,
272               unsigned insertAfter,
273               int Offset,
274               unsigned Base,
275               bool BaseKill,
276               int Opcode,
277               ARMCC::CondCodes Pred,
278               unsigned PredReg,
279               unsigned Scratch,
280               DebugLoc dl,
281               SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
282  // First calculate which of the registers should be killed by the merged
283  // instruction.
284  SmallVector<std::pair<unsigned, bool>, 8> Regs;
285  const unsigned insertPos = memOps[insertAfter].Position;
286  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
287    const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
288    unsigned Reg = MO.getReg();
289    bool isKill = MO.isKill();
290
291    // If we are inserting the merged operation after an unmerged operation that
292    // uses the same register, make sure to transfer any kill flag.
293    for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
294      if (memOps[j].Position<insertPos) {
295        const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
296        if (MOJ.getReg() == Reg && MOJ.isKill())
297          isKill = true;
298      }
299
300    Regs.push_back(std::make_pair(Reg, isKill));
301  }
302
303  // Try to do the merge.
304  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
305  Loc++;
306  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
307                Pred, PredReg, Scratch, dl, Regs))
308    return;
309
310  // Merge succeeded, update records.
311  Merges.push_back(prior(Loc));
312  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
313    // Remove kill flags from any unmerged memops that come before insertPos.
314    if (Regs[i-memOpsBegin].second)
315      for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
316        if (memOps[j].Position<insertPos) {
317          MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
318          if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
319            MOJ.setIsKill(false);
320        }
321    MBB.erase(memOps[i].MBBI);
322    memOps[i].Merged = true;
323  }
324}
325
326/// MergeLDR_STR - Merge a number of load / store instructions into one or more
327/// load / store multiple instructions.
328void
329ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
330                          unsigned Base, int Opcode, unsigned Size,
331                          ARMCC::CondCodes Pred, unsigned PredReg,
332                          unsigned Scratch, MemOpQueue &MemOps,
333                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
334  bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
335  int Offset = MemOps[SIndex].Offset;
336  int SOffset = Offset;
337  unsigned insertAfter = SIndex;
338  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
339  DebugLoc dl = Loc->getDebugLoc();
340  const MachineOperand &PMO = Loc->getOperand(0);
341  unsigned PReg = PMO.getReg();
342  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
343    : ARMRegisterInfo::getRegisterNumbering(PReg);
344
345  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
346    int NewOffset = MemOps[i].Offset;
347    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
348    unsigned Reg = MO.getReg();
349    unsigned RegNum = MO.isUndef() ? UINT_MAX
350      : ARMRegisterInfo::getRegisterNumbering(Reg);
351    // AM4 - register numbers in ascending order.
352    // AM5 - consecutive register numbers in ascending order.
353    if (Reg != ARM::SP &&
354        NewOffset == Offset + (int)Size &&
355        ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
356      Offset += Size;
357      PRegNum = RegNum;
358    } else {
359      // Can't merge this in. Try merge the earlier ones first.
360      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
361                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
362      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
363                   MemOps, Merges);
364      return;
365    }
366
367    if (MemOps[i].Position > MemOps[insertAfter].Position)
368      insertAfter = i;
369  }
370
371  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
372  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
373                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
374  return;
375}
376
377static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
378                                       unsigned Bytes, unsigned Limit,
379                                       ARMCC::CondCodes Pred, unsigned PredReg){
380  unsigned MyPredReg = 0;
381  if (!MI)
382    return false;
383  if (MI->getOpcode() != ARM::t2SUBri &&
384      MI->getOpcode() != ARM::t2SUBrSPi &&
385      MI->getOpcode() != ARM::t2SUBrSPi12 &&
386      MI->getOpcode() != ARM::tSUBspi &&
387      MI->getOpcode() != ARM::SUBri)
388    return false;
389
390  // Make sure the offset fits in 8 bits.
391  if (Bytes <= 0 || (Limit && Bytes >= Limit))
392    return false;
393
394  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
395  return (MI->getOperand(0).getReg() == Base &&
396          MI->getOperand(1).getReg() == Base &&
397          (MI->getOperand(2).getImm()*Scale) == Bytes &&
398          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
399          MyPredReg == PredReg);
400}
401
402static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
403                                       unsigned Bytes, unsigned Limit,
404                                       ARMCC::CondCodes Pred, unsigned PredReg){
405  unsigned MyPredReg = 0;
406  if (!MI)
407    return false;
408  if (MI->getOpcode() != ARM::t2ADDri &&
409      MI->getOpcode() != ARM::t2ADDrSPi &&
410      MI->getOpcode() != ARM::t2ADDrSPi12 &&
411      MI->getOpcode() != ARM::tADDspi &&
412      MI->getOpcode() != ARM::ADDri)
413    return false;
414
415  if (Bytes <= 0 || (Limit && Bytes >= Limit))
416    // Make sure the offset fits in 8 bits.
417    return false;
418
419  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
420  return (MI->getOperand(0).getReg() == Base &&
421          MI->getOperand(1).getReg() == Base &&
422          (MI->getOperand(2).getImm()*Scale) == Bytes &&
423          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
424          MyPredReg == PredReg);
425}
426
427static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
428  switch (MI->getOpcode()) {
429  default: return 0;
430  case ARM::LDR:
431  case ARM::STR:
432  case ARM::t2LDRi8:
433  case ARM::t2LDRi12:
434  case ARM::t2STRi8:
435  case ARM::t2STRi12:
436  case ARM::VLDRS:
437  case ARM::VSTRS:
438    return 4;
439  case ARM::VLDRD:
440  case ARM::VSTRD:
441    return 8;
442  case ARM::LDM:
443  case ARM::STM:
444  case ARM::t2LDM:
445  case ARM::t2STM:
446    return (MI->getNumOperands() - 4) * 4;
447  case ARM::VLDMS:
448  case ARM::VSTMS:
449  case ARM::VLDMD:
450  case ARM::VSTMD:
451    return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
452  }
453}
454
455static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
456  switch (Opc) {
457  case ARM::LDM: return ARM::LDM_UPD;
458  case ARM::STM: return ARM::STM_UPD;
459  case ARM::t2LDM: return ARM::t2LDM_UPD;
460  case ARM::t2STM: return ARM::t2STM_UPD;
461  case ARM::VLDMS: return ARM::VLDMS_UPD;
462  case ARM::VLDMD: return ARM::VLDMD_UPD;
463  case ARM::VSTMS: return ARM::VSTMS_UPD;
464  case ARM::VSTMD: return ARM::VSTMD_UPD;
465  default: llvm_unreachable("Unhandled opcode!");
466  }
467  return 0;
468}
469
470/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
471/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
472///
473/// stmia rn, <ra, rb, rc>
474/// rn := rn + 4 * 3;
475/// =>
476/// stmia rn!, <ra, rb, rc>
477///
478/// rn := rn - 4 * 3;
479/// ldmia rn, <ra, rb, rc>
480/// =>
481/// ldmdb rn!, <ra, rb, rc>
482bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
483                                               MachineBasicBlock::iterator MBBI,
484                                               bool &Advance,
485                                               MachineBasicBlock::iterator &I) {
486  MachineInstr *MI = MBBI;
487  unsigned Base = MI->getOperand(0).getReg();
488  bool BaseKill = MI->getOperand(0).isKill();
489  unsigned Bytes = getLSMultipleTransferSize(MI);
490  unsigned PredReg = 0;
491  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
492  int Opcode = MI->getOpcode();
493  DebugLoc dl = MI->getDebugLoc();
494  bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
495                Opcode == ARM::STM || Opcode == ARM::t2STM);
496
497  bool DoMerge = false;
498  ARM_AM::AMSubMode Mode = ARM_AM::ia;
499  unsigned Offset = 0;
500
501  if (isAM4) {
502    // Can't use an updating ld/st if the base register is also a dest
503    // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
504    for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
505      if (MI->getOperand(i).getReg() == Base)
506        return false;
507    }
508    Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
509  } else {
510    // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
511    Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
512    Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
513  }
514
515  // Try merging with the previous instruction.
516  if (MBBI != MBB.begin()) {
517    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
518    if (isAM4) {
519      if (Mode == ARM_AM::ia &&
520          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
521        DoMerge = true;
522        Mode = ARM_AM::db;
523      } else if (isAM4 && Mode == ARM_AM::ib &&
524                 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
525        DoMerge = true;
526        Mode = ARM_AM::da;
527      }
528    } else {
529      if (Mode == ARM_AM::ia &&
530          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
531        Mode = ARM_AM::db;
532        DoMerge = true;
533      }
534    }
535    if (DoMerge)
536      MBB.erase(PrevMBBI);
537  }
538
539  // Try merging with the next instruction.
540  if (!DoMerge && MBBI != MBB.end()) {
541    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
542    if (isAM4) {
543      if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
544          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
545        DoMerge = true;
546      } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
547                 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
548        DoMerge = true;
549      }
550    } else {
551      if (Mode == ARM_AM::ia &&
552          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
553        DoMerge = true;
554      }
555    }
556    if (DoMerge) {
557      if (NextMBBI == I) {
558        Advance = true;
559        ++I;
560      }
561      MBB.erase(NextMBBI);
562    }
563  }
564
565  if (!DoMerge)
566    return false;
567
568  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
569  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
570    .addReg(Base, getDefRegState(true)) // WB base register
571    .addReg(Base, getKillRegState(BaseKill));
572  if (isAM4) {
573    // [t2]LDM_UPD, [t2]STM_UPD
574    MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
575      .addImm(Pred).addReg(PredReg);
576  } else {
577    // VLDM[SD}_UPD, VSTM[SD]_UPD
578    MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
579      .addImm(Pred).addReg(PredReg);
580  }
581  // Transfer the rest of operands.
582  for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
583    MIB.addOperand(MI->getOperand(OpNum));
584  // Transfer memoperands.
585  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
586
587  MBB.erase(MBBI);
588  return true;
589}
590
591static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
592  switch (Opc) {
593  case ARM::LDR: return ARM::LDR_PRE;
594  case ARM::STR: return ARM::STR_PRE;
595  case ARM::VLDRS: return ARM::VLDMS_UPD;
596  case ARM::VLDRD: return ARM::VLDMD_UPD;
597  case ARM::VSTRS: return ARM::VSTMS_UPD;
598  case ARM::VSTRD: return ARM::VSTMD_UPD;
599  case ARM::t2LDRi8:
600  case ARM::t2LDRi12:
601    return ARM::t2LDR_PRE;
602  case ARM::t2STRi8:
603  case ARM::t2STRi12:
604    return ARM::t2STR_PRE;
605  default: llvm_unreachable("Unhandled opcode!");
606  }
607  return 0;
608}
609
610static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
611  switch (Opc) {
612  case ARM::LDR: return ARM::LDR_POST;
613  case ARM::STR: return ARM::STR_POST;
614  case ARM::VLDRS: return ARM::VLDMS_UPD;
615  case ARM::VLDRD: return ARM::VLDMD_UPD;
616  case ARM::VSTRS: return ARM::VSTMS_UPD;
617  case ARM::VSTRD: return ARM::VSTMD_UPD;
618  case ARM::t2LDRi8:
619  case ARM::t2LDRi12:
620    return ARM::t2LDR_POST;
621  case ARM::t2STRi8:
622  case ARM::t2STRi12:
623    return ARM::t2STR_POST;
624  default: llvm_unreachable("Unhandled opcode!");
625  }
626  return 0;
627}
628
629/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
630/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
631bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
632                                               MachineBasicBlock::iterator MBBI,
633                                               const TargetInstrInfo *TII,
634                                               bool &Advance,
635                                               MachineBasicBlock::iterator &I) {
636  MachineInstr *MI = MBBI;
637  unsigned Base = MI->getOperand(1).getReg();
638  bool BaseKill = MI->getOperand(1).isKill();
639  unsigned Bytes = getLSMultipleTransferSize(MI);
640  int Opcode = MI->getOpcode();
641  DebugLoc dl = MI->getDebugLoc();
642  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
643                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
644  bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
645  if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
646    return false;
647  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
648    return false;
649  if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
650    if (MI->getOperand(2).getImm() != 0)
651      return false;
652
653  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
654  // Can't do the merge if the destination register is the same as the would-be
655  // writeback register.
656  if (isLd && MI->getOperand(0).getReg() == Base)
657    return false;
658
659  unsigned PredReg = 0;
660  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
661  bool DoMerge = false;
662  ARM_AM::AddrOpc AddSub = ARM_AM::add;
663  unsigned NewOpc = 0;
664  // AM2 - 12 bits, thumb2 - 8 bits.
665  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
666
667  // Try merging with the previous instruction.
668  if (MBBI != MBB.begin()) {
669    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
670    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
671      DoMerge = true;
672      AddSub = ARM_AM::sub;
673    } else if (!isAM5 &&
674               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
675      DoMerge = true;
676    }
677    if (DoMerge) {
678      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
679      MBB.erase(PrevMBBI);
680    }
681  }
682
683  // Try merging with the next instruction.
684  if (!DoMerge && MBBI != MBB.end()) {
685    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
686    if (!isAM5 &&
687        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
688      DoMerge = true;
689      AddSub = ARM_AM::sub;
690    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
691      DoMerge = true;
692    }
693    if (DoMerge) {
694      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
695      if (NextMBBI == I) {
696        Advance = true;
697        ++I;
698      }
699      MBB.erase(NextMBBI);
700    }
701  }
702
703  if (!DoMerge)
704    return false;
705
706  bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
707  unsigned Offset = 0;
708  if (isAM5)
709    Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
710                               (isDPR ? 2 : 1));
711  else if (isAM2)
712    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
713  else
714    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
715
716  if (isAM5) {
717    // VLDM[SD}_UPD, VSTM[SD]_UPD
718    MachineOperand &MO = MI->getOperand(0);
719    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
720      .addReg(Base, getDefRegState(true)) // WB base register
721      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
722      .addImm(Offset)
723      .addImm(Pred).addReg(PredReg)
724      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
725                            getKillRegState(MO.isKill())));
726  } else if (isLd) {
727    if (isAM2)
728      // LDR_PRE, LDR_POST,
729      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
730        .addReg(Base, RegState::Define)
731        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
732    else
733      // t2LDR_PRE, t2LDR_POST
734      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
735        .addReg(Base, RegState::Define)
736        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
737  } else {
738    MachineOperand &MO = MI->getOperand(0);
739    if (isAM2)
740      // STR_PRE, STR_POST
741      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
742        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
743        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
744    else
745      // t2STR_PRE, t2STR_POST
746      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
747        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
748        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
749  }
750  MBB.erase(MBBI);
751
752  return true;
753}
754
755/// isMemoryOp - Returns true if instruction is a memory operations (that this
756/// pass is capable of operating on).
757static bool isMemoryOp(const MachineInstr *MI) {
758  if (MI->hasOneMemOperand()) {
759    const MachineMemOperand *MMO = *MI->memoperands_begin();
760
761    // Don't touch volatile memory accesses - we may be changing their order.
762    if (MMO->isVolatile())
763      return false;
764
765    // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
766    // not.
767    if (MMO->getAlignment() < 4)
768      return false;
769  }
770
771  // str <undef> could probably be eliminated entirely, but for now we just want
772  // to avoid making a mess of it.
773  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
774  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
775      MI->getOperand(0).isUndef())
776    return false;
777
778  // Likewise don't mess with references to undefined addresses.
779  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
780      MI->getOperand(1).isUndef())
781    return false;
782
783  int Opcode = MI->getOpcode();
784  switch (Opcode) {
785  default: break;
786  case ARM::LDR:
787  case ARM::STR:
788    return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
789  case ARM::VLDRS:
790  case ARM::VSTRS:
791    return MI->getOperand(1).isReg();
792  case ARM::VLDRD:
793  case ARM::VSTRD:
794    return MI->getOperand(1).isReg();
795  case ARM::t2LDRi8:
796  case ARM::t2LDRi12:
797  case ARM::t2STRi8:
798  case ARM::t2STRi12:
799    return MI->getOperand(1).isReg();
800  }
801  return false;
802}
803
804/// AdvanceRS - Advance register scavenger to just before the earliest memory
805/// op that is being merged.
806void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
807  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
808  unsigned Position = MemOps[0].Position;
809  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
810    if (MemOps[i].Position < Position) {
811      Position = MemOps[i].Position;
812      Loc = MemOps[i].MBBI;
813    }
814  }
815
816  if (Loc != MBB.begin())
817    RS->forward(prior(Loc));
818}
819
820static int getMemoryOpOffset(const MachineInstr *MI) {
821  int Opcode = MI->getOpcode();
822  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
823  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
824  unsigned NumOperands = MI->getDesc().getNumOperands();
825  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
826
827  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
828      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
829      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
830    return OffField;
831
832  int Offset = isAM2
833    ? ARM_AM::getAM2Offset(OffField)
834    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
835             : ARM_AM::getAM5Offset(OffField) * 4);
836  if (isAM2) {
837    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
838      Offset = -Offset;
839  } else if (isAM3) {
840    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
841      Offset = -Offset;
842  } else {
843    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
844      Offset = -Offset;
845  }
846  return Offset;
847}
848
849static void InsertLDR_STR(MachineBasicBlock &MBB,
850                          MachineBasicBlock::iterator &MBBI,
851                          int OffImm, bool isDef,
852                          DebugLoc dl, unsigned NewOpc,
853                          unsigned Reg, bool RegDeadKill, bool RegUndef,
854                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
855                          unsigned OffReg, bool OffKill, bool OffUndef,
856                          ARMCC::CondCodes Pred, unsigned PredReg,
857                          const TargetInstrInfo *TII, bool isT2) {
858  int Offset = OffImm;
859  if (!isT2) {
860    if (OffImm < 0)
861      Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
862    else
863      Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
864  }
865  if (isDef) {
866    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
867                                      TII->get(NewOpc))
868      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
869      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
870    if (!isT2)
871      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
872    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
873  } else {
874    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
875                                      TII->get(NewOpc))
876      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
877      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
878    if (!isT2)
879      MIB.addReg(OffReg,  getKillRegState(OffKill)|getUndefRegState(OffUndef));
880    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
881  }
882}
883
884bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
885                                          MachineBasicBlock::iterator &MBBI) {
886  MachineInstr *MI = &*MBBI;
887  unsigned Opcode = MI->getOpcode();
888  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
889      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
890    unsigned EvenReg = MI->getOperand(0).getReg();
891    unsigned OddReg  = MI->getOperand(1).getReg();
892    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
893    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
894    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
895      return false;
896
897    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
898    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
899    bool EvenDeadKill = isLd ?
900      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
901    bool EvenUndef = MI->getOperand(0).isUndef();
902    bool OddDeadKill  = isLd ?
903      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
904    bool OddUndef = MI->getOperand(1).isUndef();
905    const MachineOperand &BaseOp = MI->getOperand(2);
906    unsigned BaseReg = BaseOp.getReg();
907    bool BaseKill = BaseOp.isKill();
908    bool BaseUndef = BaseOp.isUndef();
909    unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg();
910    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
911    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
912    int OffImm = getMemoryOpOffset(MI);
913    unsigned PredReg = 0;
914    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
915
916    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
917      // Ascending register numbers and no offset. It's safe to change it to a
918      // ldm or stm.
919      unsigned NewOpc = (isLd)
920        ? (isT2 ? ARM::t2LDM : ARM::LDM)
921        : (isT2 ? ARM::t2STM : ARM::STM);
922      if (isLd) {
923        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
924          .addReg(BaseReg, getKillRegState(BaseKill))
925          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
926          .addImm(Pred).addReg(PredReg)
927          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
928          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
929        ++NumLDRD2LDM;
930      } else {
931        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
932          .addReg(BaseReg, getKillRegState(BaseKill))
933          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
934          .addImm(Pred).addReg(PredReg)
935          .addReg(EvenReg,
936                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
937          .addReg(OddReg,
938                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
939        ++NumSTRD2STM;
940      }
941    } else {
942      // Split into two instructions.
943      assert((!isT2 || !OffReg) &&
944             "Thumb2 ldrd / strd does not encode offset register!");
945      unsigned NewOpc = (isLd)
946        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
947        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
948      DebugLoc dl = MBBI->getDebugLoc();
949      // If this is a load and base register is killed, it may have been
950      // re-defed by the load, make sure the first load does not clobber it.
951      if (isLd &&
952          (BaseKill || OffKill) &&
953          (TRI->regsOverlap(EvenReg, BaseReg) ||
954           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
955        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
956               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
957        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
958                      OddReg, OddDeadKill, false,
959                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
960                      Pred, PredReg, TII, isT2);
961        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
962                      EvenReg, EvenDeadKill, false,
963                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
964                      Pred, PredReg, TII, isT2);
965      } else {
966        if (OddReg == EvenReg && EvenDeadKill) {
967          // If the two source operands are the same, the kill marker is probably
968          // on the first one. e.g.
969          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
970          EvenDeadKill = false;
971          OddDeadKill = true;
972        }
973        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
974                      EvenReg, EvenDeadKill, EvenUndef,
975                      BaseReg, false, BaseUndef, OffReg, false, OffUndef,
976                      Pred, PredReg, TII, isT2);
977        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
978                      OddReg, OddDeadKill, OddUndef,
979                      BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
980                      Pred, PredReg, TII, isT2);
981      }
982      if (isLd)
983        ++NumLDRD2LDR;
984      else
985        ++NumSTRD2STR;
986    }
987
988    MBBI = prior(MBBI);
989    MBB.erase(MI);
990  }
991  return false;
992}
993
994/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
995/// ops of the same base and incrementing offset into LDM / STM ops.
996bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
997  unsigned NumMerges = 0;
998  unsigned NumMemOps = 0;
999  MemOpQueue MemOps;
1000  unsigned CurrBase = 0;
1001  int CurrOpc = -1;
1002  unsigned CurrSize = 0;
1003  ARMCC::CondCodes CurrPred = ARMCC::AL;
1004  unsigned CurrPredReg = 0;
1005  unsigned Position = 0;
1006  SmallVector<MachineBasicBlock::iterator,4> Merges;
1007
1008  RS->enterBasicBlock(&MBB);
1009  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1010  while (MBBI != E) {
1011    if (FixInvalidRegPairOp(MBB, MBBI))
1012      continue;
1013
1014    bool Advance  = false;
1015    bool TryMerge = false;
1016    bool Clobber  = false;
1017
1018    bool isMemOp = isMemoryOp(MBBI);
1019    if (isMemOp) {
1020      int Opcode = MBBI->getOpcode();
1021      unsigned Size = getLSMultipleTransferSize(MBBI);
1022      unsigned Base = MBBI->getOperand(1).getReg();
1023      unsigned PredReg = 0;
1024      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1025      int Offset = getMemoryOpOffset(MBBI);
1026      // Watch out for:
1027      // r4 := ldr [r5]
1028      // r5 := ldr [r5, #4]
1029      // r6 := ldr [r5, #8]
1030      //
1031      // The second ldr has effectively broken the chain even though it
1032      // looks like the later ldr(s) use the same base register. Try to
1033      // merge the ldr's so far, including this one. But don't try to
1034      // combine the following ldr(s).
1035      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1036      if (CurrBase == 0 && !Clobber) {
1037        // Start of a new chain.
1038        CurrBase = Base;
1039        CurrOpc  = Opcode;
1040        CurrSize = Size;
1041        CurrPred = Pred;
1042        CurrPredReg = PredReg;
1043        MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1044        NumMemOps++;
1045        Advance = true;
1046      } else {
1047        if (Clobber) {
1048          TryMerge = true;
1049          Advance = true;
1050        }
1051
1052        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1053          // No need to match PredReg.
1054          // Continue adding to the queue.
1055          if (Offset > MemOps.back().Offset) {
1056            MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
1057            NumMemOps++;
1058            Advance = true;
1059          } else {
1060            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1061                 I != E; ++I) {
1062              if (Offset < I->Offset) {
1063                MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
1064                NumMemOps++;
1065                Advance = true;
1066                break;
1067              } else if (Offset == I->Offset) {
1068                // Collision! This can't be merged!
1069                break;
1070              }
1071            }
1072          }
1073        }
1074      }
1075    }
1076
1077    if (Advance) {
1078      ++Position;
1079      ++MBBI;
1080      if (MBBI == E)
1081        // Reach the end of the block, try merging the memory instructions.
1082        TryMerge = true;
1083    } else
1084      TryMerge = true;
1085
1086    if (TryMerge) {
1087      if (NumMemOps > 1) {
1088        // Try to find a free register to use as a new base in case it's needed.
1089        // First advance to the instruction just before the start of the chain.
1090        AdvanceRS(MBB, MemOps);
1091        // Find a scratch register.
1092        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1093        // Process the load / store instructions.
1094        RS->forward(prior(MBBI));
1095
1096        // Merge ops.
1097        Merges.clear();
1098        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1099                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1100
1101        // Try folding preceeding/trailing base inc/dec into the generated
1102        // LDM/STM ops.
1103        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1104          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1105            ++NumMerges;
1106        NumMerges += Merges.size();
1107
1108        // Try folding preceeding/trailing base inc/dec into those load/store
1109        // that were not merged to form LDM/STM ops.
1110        for (unsigned i = 0; i != NumMemOps; ++i)
1111          if (!MemOps[i].Merged)
1112            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1113              ++NumMerges;
1114
1115        // RS may be pointing to an instruction that's deleted.
1116        RS->skipTo(prior(MBBI));
1117      } else if (NumMemOps == 1) {
1118        // Try folding preceeding/trailing base inc/dec into the single
1119        // load/store.
1120        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1121          ++NumMerges;
1122          RS->forward(prior(MBBI));
1123        }
1124      }
1125
1126      CurrBase = 0;
1127      CurrOpc = -1;
1128      CurrSize = 0;
1129      CurrPred = ARMCC::AL;
1130      CurrPredReg = 0;
1131      if (NumMemOps) {
1132        MemOps.clear();
1133        NumMemOps = 0;
1134      }
1135
1136      // If iterator hasn't been advanced and this is not a memory op, skip it.
1137      // It can't start a new chain anyway.
1138      if (!Advance && !isMemOp && MBBI != E) {
1139        ++Position;
1140        ++MBBI;
1141      }
1142    }
1143  }
1144  return NumMerges > 0;
1145}
1146
1147namespace {
1148  struct OffsetCompare {
1149    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1150      int LOffset = getMemoryOpOffset(LHS);
1151      int ROffset = getMemoryOpOffset(RHS);
1152      assert(LHS == RHS || LOffset != ROffset);
1153      return LOffset > ROffset;
1154    }
1155  };
1156}
1157
1158/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
1159/// (bx lr) into the preceeding stack restore so it directly restore the value
1160/// of LR into pc.
1161///   ldmfd sp!, {r7, lr}
1162///   bx lr
1163/// =>
1164///   ldmfd sp!, {r7, pc}
1165bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1166  if (MBB.empty()) return false;
1167
1168  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1169  if (MBBI != MBB.begin() &&
1170      (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
1171    MachineInstr *PrevMI = prior(MBBI);
1172    if (PrevMI->getOpcode() == ARM::LDM_UPD ||
1173        PrevMI->getOpcode() == ARM::t2LDM_UPD) {
1174      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1175      if (MO.getReg() != ARM::LR)
1176        return false;
1177      unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
1178      PrevMI->setDesc(TII->get(NewOpc));
1179      MO.setReg(ARM::PC);
1180      MBB.erase(MBBI);
1181      return true;
1182    }
1183  }
1184  return false;
1185}
1186
1187bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1188  const TargetMachine &TM = Fn.getTarget();
1189  AFI = Fn.getInfo<ARMFunctionInfo>();
1190  TII = TM.getInstrInfo();
1191  TRI = TM.getRegisterInfo();
1192  RS = new RegScavenger();
1193  isThumb2 = AFI->isThumb2Function();
1194
1195  bool Modified = false;
1196  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1197       ++MFI) {
1198    MachineBasicBlock &MBB = *MFI;
1199    Modified |= LoadStoreMultipleOpti(MBB);
1200    Modified |= MergeReturnIntoLDM(MBB);
1201  }
1202
1203  delete RS;
1204  return Modified;
1205}
1206
1207
1208/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1209/// load / stores from consecutive locations close to make it more
1210/// likely they will be combined later.
1211
1212namespace {
1213  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1214    static char ID;
1215    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
1216
1217    const TargetData *TD;
1218    const TargetInstrInfo *TII;
1219    const TargetRegisterInfo *TRI;
1220    const ARMSubtarget *STI;
1221    MachineRegisterInfo *MRI;
1222    MachineFunction *MF;
1223
1224    virtual bool runOnMachineFunction(MachineFunction &Fn);
1225
1226    virtual const char *getPassName() const {
1227      return "ARM pre- register allocation load / store optimization pass";
1228    }
1229
1230  private:
1231    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1232                          unsigned &NewOpc, unsigned &EvenReg,
1233                          unsigned &OddReg, unsigned &BaseReg,
1234                          unsigned &OffReg, int &Offset,
1235                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1236                          bool &isT2);
1237    bool RescheduleOps(MachineBasicBlock *MBB,
1238                       SmallVector<MachineInstr*, 4> &Ops,
1239                       unsigned Base, bool isLd,
1240                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1241    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1242  };
1243  char ARMPreAllocLoadStoreOpt::ID = 0;
1244}
1245
1246bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1247  TD  = Fn.getTarget().getTargetData();
1248  TII = Fn.getTarget().getInstrInfo();
1249  TRI = Fn.getTarget().getRegisterInfo();
1250  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1251  MRI = &Fn.getRegInfo();
1252  MF  = &Fn;
1253
1254  bool Modified = false;
1255  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1256       ++MFI)
1257    Modified |= RescheduleLoadStoreInstrs(MFI);
1258
1259  return Modified;
1260}
1261
1262static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1263                                      MachineBasicBlock::iterator I,
1264                                      MachineBasicBlock::iterator E,
1265                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1266                                      SmallSet<unsigned, 4> &MemRegs,
1267                                      const TargetRegisterInfo *TRI) {
1268  // Are there stores / loads / calls between them?
1269  // FIXME: This is overly conservative. We should make use of alias information
1270  // some day.
1271  SmallSet<unsigned, 4> AddedRegPressure;
1272  while (++I != E) {
1273    if (MemOps.count(&*I))
1274      continue;
1275    const TargetInstrDesc &TID = I->getDesc();
1276    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1277      return false;
1278    if (isLd && TID.mayStore())
1279      return false;
1280    if (!isLd) {
1281      if (TID.mayLoad())
1282        return false;
1283      // It's not safe to move the first 'str' down.
1284      // str r1, [r0]
1285      // strh r5, [r0]
1286      // str r4, [r0, #+4]
1287      if (TID.mayStore())
1288        return false;
1289    }
1290    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1291      MachineOperand &MO = I->getOperand(j);
1292      if (!MO.isReg())
1293        continue;
1294      unsigned Reg = MO.getReg();
1295      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1296        return false;
1297      if (Reg != Base && !MemRegs.count(Reg))
1298        AddedRegPressure.insert(Reg);
1299    }
1300  }
1301
1302  // Estimate register pressure increase due to the transformation.
1303  if (MemRegs.size() <= 4)
1304    // Ok if we are moving small number of instructions.
1305    return true;
1306  return AddedRegPressure.size() <= MemRegs.size() * 2;
1307}
1308
1309bool
1310ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1311                                          DebugLoc &dl,
1312                                          unsigned &NewOpc, unsigned &EvenReg,
1313                                          unsigned &OddReg, unsigned &BaseReg,
1314                                          unsigned &OffReg, int &Offset,
1315                                          unsigned &PredReg,
1316                                          ARMCC::CondCodes &Pred,
1317                                          bool &isT2) {
1318  // Make sure we're allowed to generate LDRD/STRD.
1319  if (!STI->hasV5TEOps())
1320    return false;
1321
1322  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1323  unsigned Scale = 1;
1324  unsigned Opcode = Op0->getOpcode();
1325  if (Opcode == ARM::LDR)
1326    NewOpc = ARM::LDRD;
1327  else if (Opcode == ARM::STR)
1328    NewOpc = ARM::STRD;
1329  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1330    NewOpc = ARM::t2LDRDi8;
1331    Scale = 4;
1332    isT2 = true;
1333  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1334    NewOpc = ARM::t2STRDi8;
1335    Scale = 4;
1336    isT2 = true;
1337  } else
1338    return false;
1339
1340  // Make sure the offset registers match.
1341  if (!isT2 &&
1342      (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
1343      return false;
1344
1345  // Must sure the base address satisfies i64 ld / st alignment requirement.
1346  if (!Op0->hasOneMemOperand() ||
1347      !(*Op0->memoperands_begin())->getValue() ||
1348      (*Op0->memoperands_begin())->isVolatile())
1349    return false;
1350
1351  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1352  Function *Func = MF->getFunction();
1353  unsigned ReqAlign = STI->hasV6Ops()
1354    ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
1355    : 8;  // Pre-v6 need 8-byte align
1356  if (Align < ReqAlign)
1357    return false;
1358
1359  // Then make sure the immediate offset fits.
1360  int OffImm = getMemoryOpOffset(Op0);
1361  if (isT2) {
1362    if (OffImm < 0) {
1363      if (OffImm < -255)
1364        // Can't fall back to t2LDRi8 / t2STRi8.
1365        return false;
1366    } else {
1367      int Limit = (1 << 8) * Scale;
1368      if (OffImm >= Limit || (OffImm & (Scale-1)))
1369        return false;
1370    }
1371    Offset = OffImm;
1372  } else {
1373    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1374    if (OffImm < 0) {
1375      AddSub = ARM_AM::sub;
1376      OffImm = - OffImm;
1377    }
1378    int Limit = (1 << 8) * Scale;
1379    if (OffImm >= Limit || (OffImm & (Scale-1)))
1380      return false;
1381    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1382  }
1383  EvenReg = Op0->getOperand(0).getReg();
1384  OddReg  = Op1->getOperand(0).getReg();
1385  if (EvenReg == OddReg)
1386    return false;
1387  BaseReg = Op0->getOperand(1).getReg();
1388  if (!isT2)
1389    OffReg = Op0->getOperand(2).getReg();
1390  Pred = llvm::getInstrPredicate(Op0, PredReg);
1391  dl = Op0->getDebugLoc();
1392  return true;
1393}
1394
1395bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1396                                 SmallVector<MachineInstr*, 4> &Ops,
1397                                 unsigned Base, bool isLd,
1398                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1399  bool RetVal = false;
1400
1401  // Sort by offset (in reverse order).
1402  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1403
1404  // The loads / stores of the same base are in order. Scan them from first to
1405  // last and check for the followins:
1406  // 1. Any def of base.
1407  // 2. Any gaps.
1408  while (Ops.size() > 1) {
1409    unsigned FirstLoc = ~0U;
1410    unsigned LastLoc = 0;
1411    MachineInstr *FirstOp = 0;
1412    MachineInstr *LastOp = 0;
1413    int LastOffset = 0;
1414    unsigned LastOpcode = 0;
1415    unsigned LastBytes = 0;
1416    unsigned NumMove = 0;
1417    for (int i = Ops.size() - 1; i >= 0; --i) {
1418      MachineInstr *Op = Ops[i];
1419      unsigned Loc = MI2LocMap[Op];
1420      if (Loc <= FirstLoc) {
1421        FirstLoc = Loc;
1422        FirstOp = Op;
1423      }
1424      if (Loc >= LastLoc) {
1425        LastLoc = Loc;
1426        LastOp = Op;
1427      }
1428
1429      unsigned Opcode = Op->getOpcode();
1430      if (LastOpcode && Opcode != LastOpcode)
1431        break;
1432
1433      int Offset = getMemoryOpOffset(Op);
1434      unsigned Bytes = getLSMultipleTransferSize(Op);
1435      if (LastBytes) {
1436        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1437          break;
1438      }
1439      LastOffset = Offset;
1440      LastBytes = Bytes;
1441      LastOpcode = Opcode;
1442      if (++NumMove == 8) // FIXME: Tune this limit.
1443        break;
1444    }
1445
1446    if (NumMove <= 1)
1447      Ops.pop_back();
1448    else {
1449      SmallPtrSet<MachineInstr*, 4> MemOps;
1450      SmallSet<unsigned, 4> MemRegs;
1451      for (int i = NumMove-1; i >= 0; --i) {
1452        MemOps.insert(Ops[i]);
1453        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1454      }
1455
1456      // Be conservative, if the instructions are too far apart, don't
1457      // move them. We want to limit the increase of register pressure.
1458      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1459      if (DoMove)
1460        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1461                                           MemOps, MemRegs, TRI);
1462      if (!DoMove) {
1463        for (unsigned i = 0; i != NumMove; ++i)
1464          Ops.pop_back();
1465      } else {
1466        // This is the new location for the loads / stores.
1467        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1468        while (InsertPos != MBB->end() && MemOps.count(InsertPos))
1469          ++InsertPos;
1470
1471        // If we are moving a pair of loads / stores, see if it makes sense
1472        // to try to allocate a pair of registers that can form register pairs.
1473        MachineInstr *Op0 = Ops.back();
1474        MachineInstr *Op1 = Ops[Ops.size()-2];
1475        unsigned EvenReg = 0, OddReg = 0;
1476        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1477        ARMCC::CondCodes Pred = ARMCC::AL;
1478        bool isT2 = false;
1479        unsigned NewOpc = 0;
1480        int Offset = 0;
1481        DebugLoc dl;
1482        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1483                                             EvenReg, OddReg, BaseReg, OffReg,
1484                                             Offset, PredReg, Pred, isT2)) {
1485          Ops.pop_back();
1486          Ops.pop_back();
1487
1488          // Form the pair instruction.
1489          if (isLd) {
1490            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1491                                              dl, TII->get(NewOpc))
1492              .addReg(EvenReg, RegState::Define)
1493              .addReg(OddReg, RegState::Define)
1494              .addReg(BaseReg);
1495            if (!isT2)
1496              MIB.addReg(OffReg);
1497            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1498            ++NumLDRDFormed;
1499          } else {
1500            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1501                                              dl, TII->get(NewOpc))
1502              .addReg(EvenReg)
1503              .addReg(OddReg)
1504              .addReg(BaseReg);
1505            if (!isT2)
1506              MIB.addReg(OffReg);
1507            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1508            ++NumSTRDFormed;
1509          }
1510          MBB->erase(Op0);
1511          MBB->erase(Op1);
1512
1513          // Add register allocation hints to form register pairs.
1514          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1515          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1516        } else {
1517          for (unsigned i = 0; i != NumMove; ++i) {
1518            MachineInstr *Op = Ops.back();
1519            Ops.pop_back();
1520            MBB->splice(InsertPos, MBB, Op);
1521          }
1522        }
1523
1524        NumLdStMoved += NumMove;
1525        RetVal = true;
1526      }
1527    }
1528  }
1529
1530  return RetVal;
1531}
1532
1533bool
1534ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1535  bool RetVal = false;
1536
1537  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1538  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1539  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1540  SmallVector<unsigned, 4> LdBases;
1541  SmallVector<unsigned, 4> StBases;
1542
1543  unsigned Loc = 0;
1544  MachineBasicBlock::iterator MBBI = MBB->begin();
1545  MachineBasicBlock::iterator E = MBB->end();
1546  while (MBBI != E) {
1547    for (; MBBI != E; ++MBBI) {
1548      MachineInstr *MI = MBBI;
1549      const TargetInstrDesc &TID = MI->getDesc();
1550      if (TID.isCall() || TID.isTerminator()) {
1551        // Stop at barriers.
1552        ++MBBI;
1553        break;
1554      }
1555
1556      MI2LocMap[MI] = Loc++;
1557      if (!isMemoryOp(MI))
1558        continue;
1559      unsigned PredReg = 0;
1560      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1561        continue;
1562
1563      int Opc = MI->getOpcode();
1564      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1565      unsigned Base = MI->getOperand(1).getReg();
1566      int Offset = getMemoryOpOffset(MI);
1567
1568      bool StopHere = false;
1569      if (isLd) {
1570        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1571          Base2LdsMap.find(Base);
1572        if (BI != Base2LdsMap.end()) {
1573          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1574            if (Offset == getMemoryOpOffset(BI->second[i])) {
1575              StopHere = true;
1576              break;
1577            }
1578          }
1579          if (!StopHere)
1580            BI->second.push_back(MI);
1581        } else {
1582          SmallVector<MachineInstr*, 4> MIs;
1583          MIs.push_back(MI);
1584          Base2LdsMap[Base] = MIs;
1585          LdBases.push_back(Base);
1586        }
1587      } else {
1588        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1589          Base2StsMap.find(Base);
1590        if (BI != Base2StsMap.end()) {
1591          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1592            if (Offset == getMemoryOpOffset(BI->second[i])) {
1593              StopHere = true;
1594              break;
1595            }
1596          }
1597          if (!StopHere)
1598            BI->second.push_back(MI);
1599        } else {
1600          SmallVector<MachineInstr*, 4> MIs;
1601          MIs.push_back(MI);
1602          Base2StsMap[Base] = MIs;
1603          StBases.push_back(Base);
1604        }
1605      }
1606
1607      if (StopHere) {
1608        // Found a duplicate (a base+offset combination that's seen earlier).
1609        // Backtrack.
1610        --Loc;
1611        break;
1612      }
1613    }
1614
1615    // Re-schedule loads.
1616    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1617      unsigned Base = LdBases[i];
1618      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1619      if (Lds.size() > 1)
1620        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1621    }
1622
1623    // Re-schedule stores.
1624    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1625      unsigned Base = StBases[i];
1626      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1627      if (Sts.size() > 1)
1628        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1629    }
1630
1631    if (MBBI != E) {
1632      Base2LdsMap.clear();
1633      Base2StsMap.clear();
1634      LdBases.clear();
1635      StBases.clear();
1636    }
1637  }
1638
1639  return RetVal;
1640}
1641
1642
1643/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1644/// optimization pass.
1645FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1646  if (PreAlloc)
1647    return new ARMPreAllocLoadStoreOpt();
1648  return new ARMLoadStoreOpt();
1649}
1650