ARMLoadStoreOptimizer.cpp revision 974fe5d69187bdf33b0e111ff72e965431df4191
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMRegisterInfo.h"
20#include "llvm/DerivedTypes.h"
21#include "llvm/CodeGen/MachineBasicBlock.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/RegisterScavenging.h"
27#include "llvm/Target/TargetData.h"
28#include "llvm/Target/TargetInstrInfo.h"
29#include "llvm/Target/TargetMachine.h"
30#include "llvm/Target/TargetRegisterInfo.h"
31#include "llvm/Support/Compiler.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37using namespace llvm;
38
39STATISTIC(NumLDMGened , "Number of ldm instructions generated");
40STATISTIC(NumSTMGened , "Number of stm instructions generated");
41STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
42STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
43STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
44STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
45STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
46STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
47STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
48STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
49STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
50
51/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
52/// load / store instructions to form ldm / stm instructions.
53
54namespace {
55  struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
56    static char ID;
57    ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
58
59    const TargetInstrInfo *TII;
60    const TargetRegisterInfo *TRI;
61    ARMFunctionInfo *AFI;
62    RegScavenger *RS;
63
64    virtual bool runOnMachineFunction(MachineFunction &Fn);
65
66    virtual const char *getPassName() const {
67      return "ARM load / store optimization pass";
68    }
69
70  private:
71    struct MemOpQueueEntry {
72      int Offset;
73      unsigned Position;
74      MachineBasicBlock::iterator MBBI;
75      bool Merged;
76      MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
77        : Offset(o), Position(p), MBBI(i), Merged(false) {};
78    };
79    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
80    typedef MemOpQueue::iterator MemOpQueueIter;
81
82    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
83                  int Offset, unsigned Base, bool BaseKill, int Opcode,
84                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
85                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
86    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
87                      int Opcode, unsigned Size,
88                      ARMCC::CondCodes Pred, unsigned PredReg,
89                      unsigned Scratch, MemOpQueue &MemOps,
90                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
91
92    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
93    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
94                             MachineBasicBlock::iterator &MBBI);
95    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
96    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
97  };
98  char ARMLoadStoreOpt::ID = 0;
99}
100
101static int getLoadStoreMultipleOpcode(int Opcode) {
102  switch (Opcode) {
103  case ARM::LDR:
104    NumLDMGened++;
105    return ARM::LDM;
106  case ARM::STR:
107    NumSTMGened++;
108    return ARM::STM;
109  case ARM::FLDS:
110    NumFLDMGened++;
111    return ARM::FLDMS;
112  case ARM::FSTS:
113    NumFSTMGened++;
114    return ARM::FSTMS;
115  case ARM::FLDD:
116    NumFLDMGened++;
117    return ARM::FLDMD;
118  case ARM::FSTD:
119    NumFSTMGened++;
120    return ARM::FSTMD;
121  default: abort();
122  }
123  return 0;
124}
125
126/// MergeOps - Create and insert a LDM or STM with Base as base register and
127/// registers in Regs as the register operands that would be loaded / stored.
128/// It returns true if the transformation is done.
129bool
130ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
131                          MachineBasicBlock::iterator MBBI,
132                          int Offset, unsigned Base, bool BaseKill,
133                          int Opcode, ARMCC::CondCodes Pred,
134                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
135                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
136  // Only a single register to load / store. Don't bother.
137  unsigned NumRegs = Regs.size();
138  if (NumRegs <= 1)
139    return false;
140
141  ARM_AM::AMSubMode Mode = ARM_AM::ia;
142  bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
143  if (isAM4 && Offset == 4)
144    Mode = ARM_AM::ib;
145  else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
146    Mode = ARM_AM::da;
147  else if (isAM4 && Offset == -4 * (int)NumRegs)
148    Mode = ARM_AM::db;
149  else if (Offset != 0) {
150    // If starting offset isn't zero, insert a MI to materialize a new base.
151    // But only do so if it is cost effective, i.e. merging more than two
152    // loads / stores.
153    if (NumRegs <= 2)
154      return false;
155
156    unsigned NewBase;
157    if (Opcode == ARM::LDR)
158      // If it is a load, then just use one of the destination register to
159      // use as the new base.
160      NewBase = Regs[NumRegs-1].first;
161    else {
162      // Use the scratch register to use as a new base.
163      NewBase = Scratch;
164      if (NewBase == 0)
165        return false;
166    }
167    int BaseOpc = ARM::ADDri;
168    if (Offset < 0) {
169      BaseOpc = ARM::SUBri;
170      Offset = - Offset;
171    }
172    int ImmedOffset = ARM_AM::getSOImmVal(Offset);
173    if (ImmedOffset == -1)
174      return false;  // Probably not worth it then.
175
176    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
177      .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset)
178      .addImm(Pred).addReg(PredReg).addReg(0);
179    Base = NewBase;
180    BaseKill = true;  // New base is always killed right its use.
181  }
182
183  bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
184  bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
185  Opcode = getLoadStoreMultipleOpcode(Opcode);
186  MachineInstrBuilder MIB = (isAM4)
187    ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
188        .addReg(Base, getKillRegState(BaseKill))
189        .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
190    : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
191        .addReg(Base, getKillRegState(BaseKill))
192        .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
193        .addImm(Pred).addReg(PredReg);
194  for (unsigned i = 0; i != NumRegs; ++i)
195    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
196                     | getKillRegState(Regs[i].second));
197
198  return true;
199}
200
201/// MergeLDR_STR - Merge a number of load / store instructions into one or more
202/// load / store multiple instructions.
203void
204ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
205                          unsigned Base, int Opcode, unsigned Size,
206                          ARMCC::CondCodes Pred, unsigned PredReg,
207                          unsigned Scratch, MemOpQueue &MemOps,
208                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
209  bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
210  int Offset = MemOps[SIndex].Offset;
211  int SOffset = Offset;
212  unsigned Pos = MemOps[SIndex].Position;
213  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
214  DebugLoc dl = Loc->getDebugLoc();
215  unsigned PReg = Loc->getOperand(0).getReg();
216  unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
217  bool isKill = Loc->getOperand(0).isKill();
218
219  SmallVector<std::pair<unsigned,bool>, 8> Regs;
220  Regs.push_back(std::make_pair(PReg, isKill));
221  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
222    int NewOffset = MemOps[i].Offset;
223    unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
224    unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
225    isKill = MemOps[i].MBBI->getOperand(0).isKill();
226    // AM4 - register numbers in ascending order.
227    // AM5 - consecutive register numbers in ascending order.
228    if (NewOffset == Offset + (int)Size &&
229        ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
230      Offset += Size;
231      Regs.push_back(std::make_pair(Reg, isKill));
232      PRegNum = RegNum;
233    } else {
234      // Can't merge this in. Try merge the earlier ones first.
235      if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
236                   Scratch, dl, Regs)) {
237        Merges.push_back(prior(Loc));
238        for (unsigned j = SIndex; j < i; ++j) {
239          MBB.erase(MemOps[j].MBBI);
240          MemOps[j].Merged = true;
241        }
242      }
243      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
244                   MemOps, Merges);
245      return;
246    }
247
248    if (MemOps[i].Position > Pos) {
249      Pos = MemOps[i].Position;
250      Loc = MemOps[i].MBBI;
251    }
252  }
253
254  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
255  if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
256               Scratch, dl, Regs)) {
257    Merges.push_back(prior(Loc));
258    for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
259      MBB.erase(MemOps[i].MBBI);
260      MemOps[i].Merged = true;
261    }
262  }
263
264  return;
265}
266
267/// getInstrPredicate - If instruction is predicated, returns its predicate
268/// condition, otherwise returns AL. It also returns the condition code
269/// register by reference.
270static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
271  int PIdx = MI->findFirstPredOperandIdx();
272  if (PIdx == -1) {
273    PredReg = 0;
274    return ARMCC::AL;
275  }
276
277  PredReg = MI->getOperand(PIdx+1).getReg();
278  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
279}
280
281static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
282                                       unsigned Bytes, ARMCC::CondCodes Pred,
283                                       unsigned PredReg) {
284  unsigned MyPredReg = 0;
285  return (MI && MI->getOpcode() == ARM::SUBri &&
286          MI->getOperand(0).getReg() == Base &&
287          MI->getOperand(1).getReg() == Base &&
288          ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
289          getInstrPredicate(MI, MyPredReg) == Pred &&
290          MyPredReg == PredReg);
291}
292
293static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
294                                       unsigned Bytes, ARMCC::CondCodes Pred,
295                                       unsigned PredReg) {
296  unsigned MyPredReg = 0;
297  return (MI && MI->getOpcode() == ARM::ADDri &&
298          MI->getOperand(0).getReg() == Base &&
299          MI->getOperand(1).getReg() == Base &&
300          ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
301          getInstrPredicate(MI, MyPredReg) == Pred &&
302          MyPredReg == PredReg);
303}
304
305static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
306  switch (MI->getOpcode()) {
307  default: return 0;
308  case ARM::LDR:
309  case ARM::STR:
310  case ARM::FLDS:
311  case ARM::FSTS:
312    return 4;
313  case ARM::FLDD:
314  case ARM::FSTD:
315    return 8;
316  case ARM::LDM:
317  case ARM::STM:
318    return (MI->getNumOperands() - 4) * 4;
319  case ARM::FLDMS:
320  case ARM::FSTMS:
321  case ARM::FLDMD:
322  case ARM::FSTMD:
323    return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
324  }
325}
326
327/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
328/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
329///
330/// stmia rn, <ra, rb, rc>
331/// rn := rn + 4 * 3;
332/// =>
333/// stmia rn!, <ra, rb, rc>
334///
335/// rn := rn - 4 * 3;
336/// ldmia rn, <ra, rb, rc>
337/// =>
338/// ldmdb rn!, <ra, rb, rc>
339static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
340                                      MachineBasicBlock::iterator MBBI,
341                                      bool &Advance,
342                                      MachineBasicBlock::iterator &I) {
343  MachineInstr *MI = MBBI;
344  unsigned Base = MI->getOperand(0).getReg();
345  unsigned Bytes = getLSMultipleTransferSize(MI);
346  unsigned PredReg = 0;
347  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
348  int Opcode = MI->getOpcode();
349  bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
350
351  if (isAM4) {
352    if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
353      return false;
354
355    // Can't use the updating AM4 sub-mode if the base register is also a dest
356    // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
357    for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
358      if (MI->getOperand(i).getReg() == Base)
359        return false;
360    }
361
362    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
363    if (MBBI != MBB.begin()) {
364      MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
365      if (Mode == ARM_AM::ia &&
366          isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
367        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
368        MBB.erase(PrevMBBI);
369        return true;
370      } else if (Mode == ARM_AM::ib &&
371                 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
372        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
373        MBB.erase(PrevMBBI);
374        return true;
375      }
376    }
377
378    if (MBBI != MBB.end()) {
379      MachineBasicBlock::iterator NextMBBI = next(MBBI);
380      if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
381          isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
382        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
383        if (NextMBBI == I) {
384          Advance = true;
385          ++I;
386        }
387        MBB.erase(NextMBBI);
388        return true;
389      } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
390                 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
391        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
392        if (NextMBBI == I) {
393          Advance = true;
394          ++I;
395        }
396        MBB.erase(NextMBBI);
397        return true;
398      }
399    }
400  } else {
401    // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
402    if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
403      return false;
404
405    ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
406    unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
407    if (MBBI != MBB.begin()) {
408      MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
409      if (Mode == ARM_AM::ia &&
410          isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
411        MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
412        MBB.erase(PrevMBBI);
413        return true;
414      }
415    }
416
417    if (MBBI != MBB.end()) {
418      MachineBasicBlock::iterator NextMBBI = next(MBBI);
419      if (Mode == ARM_AM::ia &&
420          isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
421        MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
422        if (NextMBBI == I) {
423          Advance = true;
424          ++I;
425        }
426        MBB.erase(NextMBBI);
427      }
428      return true;
429    }
430  }
431
432  return false;
433}
434
435static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
436  switch (Opc) {
437  case ARM::LDR: return ARM::LDR_PRE;
438  case ARM::STR: return ARM::STR_PRE;
439  case ARM::FLDS: return ARM::FLDMS;
440  case ARM::FLDD: return ARM::FLDMD;
441  case ARM::FSTS: return ARM::FSTMS;
442  case ARM::FSTD: return ARM::FSTMD;
443  default: abort();
444  }
445  return 0;
446}
447
448static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
449  switch (Opc) {
450  case ARM::LDR: return ARM::LDR_POST;
451  case ARM::STR: return ARM::STR_POST;
452  case ARM::FLDS: return ARM::FLDMS;
453  case ARM::FLDD: return ARM::FLDMD;
454  case ARM::FSTS: return ARM::FSTMS;
455  case ARM::FSTD: return ARM::FSTMD;
456  default: abort();
457  }
458  return 0;
459}
460
461/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
462/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
463static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
464                                     MachineBasicBlock::iterator MBBI,
465                                     const TargetInstrInfo *TII,
466                                     bool &Advance,
467                                     MachineBasicBlock::iterator &I) {
468  MachineInstr *MI = MBBI;
469  unsigned Base = MI->getOperand(1).getReg();
470  bool BaseKill = MI->getOperand(1).isKill();
471  unsigned Bytes = getLSMultipleTransferSize(MI);
472  int Opcode = MI->getOpcode();
473  DebugLoc dl = MI->getDebugLoc();
474  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
475  if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
476      (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
477    return false;
478
479  bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
480  // Can't do the merge if the destination register is the same as the would-be
481  // writeback register.
482  if (isLd && MI->getOperand(0).getReg() == Base)
483    return false;
484
485  unsigned PredReg = 0;
486  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
487  bool DoMerge = false;
488  ARM_AM::AddrOpc AddSub = ARM_AM::add;
489  unsigned NewOpc = 0;
490  if (MBBI != MBB.begin()) {
491    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
492    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
493      DoMerge = true;
494      AddSub = ARM_AM::sub;
495      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
496    } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
497                                            Pred, PredReg)) {
498      DoMerge = true;
499      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
500    }
501    if (DoMerge)
502      MBB.erase(PrevMBBI);
503  }
504
505  if (!DoMerge && MBBI != MBB.end()) {
506    MachineBasicBlock::iterator NextMBBI = next(MBBI);
507    if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
508      DoMerge = true;
509      AddSub = ARM_AM::sub;
510      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
511    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
512      DoMerge = true;
513      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
514    }
515    if (DoMerge) {
516      if (NextMBBI == I) {
517        Advance = true;
518        ++I;
519      }
520      MBB.erase(NextMBBI);
521    }
522  }
523
524  if (!DoMerge)
525    return false;
526
527  bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
528  unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
529    : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
530                        true, isDPR ? 2 : 1);
531  if (isLd) {
532    if (isAM2)
533      // LDR_PRE, LDR_POST;
534      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
535        .addReg(Base, RegState::Define)
536        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
537    else
538      // FLDMS, FLDMD
539      BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
540        .addReg(Base, getKillRegState(BaseKill))
541        .addImm(Offset).addImm(Pred).addReg(PredReg)
542        .addReg(MI->getOperand(0).getReg(), RegState::Define);
543  } else {
544    MachineOperand &MO = MI->getOperand(0);
545    if (isAM2)
546      // STR_PRE, STR_POST;
547      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
548        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
549        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
550    else
551      // FSTMS, FSTMD
552      BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
553        .addImm(Pred).addReg(PredReg)
554        .addReg(MO.getReg(), getKillRegState(MO.isKill()));
555  }
556  MBB.erase(MBBI);
557
558  return true;
559}
560
561/// isMemoryOp - Returns true if instruction is a memory operations (that this
562/// pass is capable of operating on).
563static bool isMemoryOp(MachineInstr *MI) {
564  int Opcode = MI->getOpcode();
565  switch (Opcode) {
566  default: break;
567  case ARM::LDR:
568  case ARM::STR:
569    return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
570  case ARM::FLDS:
571  case ARM::FSTS:
572    return MI->getOperand(1).isReg();
573  case ARM::FLDD:
574  case ARM::FSTD:
575    return MI->getOperand(1).isReg();
576  }
577  return false;
578}
579
580/// AdvanceRS - Advance register scavenger to just before the earliest memory
581/// op that is being merged.
582void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
583  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
584  unsigned Position = MemOps[0].Position;
585  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
586    if (MemOps[i].Position < Position) {
587      Position = MemOps[i].Position;
588      Loc = MemOps[i].MBBI;
589    }
590  }
591
592  if (Loc != MBB.begin())
593    RS->forward(prior(Loc));
594}
595
596static int getMemoryOpOffset(const MachineInstr *MI) {
597  int Opcode = MI->getOpcode();
598  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
599  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
600  unsigned NumOperands = MI->getDesc().getNumOperands();
601  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
602  int Offset = isAM2
603    ? ARM_AM::getAM2Offset(OffField)
604    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
605             : ARM_AM::getAM5Offset(OffField) * 4);
606  if (isAM2) {
607    if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
608      Offset = -Offset;
609  } else if (isAM3) {
610    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
611      Offset = -Offset;
612  } else {
613    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
614      Offset = -Offset;
615  }
616  return Offset;
617}
618
619static void InsertLDR_STR(MachineBasicBlock &MBB,
620                          MachineBasicBlock::iterator &MBBI,
621                          int OffImm, bool isDef,
622                          DebugLoc dl, unsigned NewOpc,
623                          unsigned Reg, bool RegDeadKill,
624                          unsigned BaseReg, bool BaseKill,
625                          unsigned OffReg, bool OffKill,
626                          ARMCC::CondCodes Pred, unsigned PredReg,
627                          const TargetInstrInfo *TII) {
628  unsigned Offset;
629  if (OffImm < 0)
630    Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
631  else
632    Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
633  if (isDef)
634    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
635      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
636      .addReg(BaseReg, getKillRegState(BaseKill))
637      .addReg(OffReg,  getKillRegState(OffKill))
638      .addImm(Offset)
639      .addImm(Pred).addReg(PredReg);
640  else
641    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
642      .addReg(Reg, getKillRegState(RegDeadKill))
643      .addReg(BaseReg, getKillRegState(BaseKill))
644      .addReg(OffReg,  getKillRegState(OffKill))
645      .addImm(Offset)
646      .addImm(Pred).addReg(PredReg);
647}
648
649bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
650                                          MachineBasicBlock::iterator &MBBI) {
651  MachineInstr *MI = &*MBBI;
652  unsigned Opcode = MI->getOpcode();
653  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
654    unsigned EvenReg = MI->getOperand(0).getReg();
655    unsigned OddReg  = MI->getOperand(1).getReg();
656    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
657    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
658    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
659      return false;
660
661    bool isLd = Opcode == ARM::LDRD;
662    bool EvenDeadKill = isLd ?
663      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
664    bool OddDeadKill  = isLd ?
665      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
666    const MachineOperand &BaseOp = MI->getOperand(2);
667    unsigned BaseReg = BaseOp.getReg();
668    bool BaseKill = BaseOp.isKill();
669    const MachineOperand &OffOp = MI->getOperand(3);
670    unsigned OffReg = OffOp.getReg();
671    bool OffKill = OffOp.isKill();
672    int OffImm = getMemoryOpOffset(MI);
673    unsigned PredReg = 0;
674    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
675
676    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
677      // Ascending register numbers and no offset. It's safe to change it to a
678      // ldm or stm.
679      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
680      if (isLd) {
681        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
682          .addReg(BaseReg, getKillRegState(BaseKill))
683          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
684          .addImm(Pred).addReg(PredReg)
685          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
686          .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
687        ++NumLDRD2LDM;
688      } else {
689        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
690          .addReg(BaseReg, getKillRegState(BaseKill))
691          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
692          .addImm(Pred).addReg(PredReg)
693          .addReg(EvenReg, getKillRegState(EvenDeadKill))
694          .addReg(OddReg, getKillRegState(OddDeadKill));
695        ++NumSTRD2STM;
696      }
697    } else {
698      // Split into two instructions.
699      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
700      DebugLoc dl = MBBI->getDebugLoc();
701      // If this is a load and base register is killed, it may have been
702      // re-defed by the load, make sure the first load does not clobber it.
703      if (isLd &&
704          (BaseKill || OffKill) &&
705          (TRI->regsOverlap(EvenReg, BaseReg) ||
706           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
707        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
708               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
709        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
710                      BaseReg, false, OffReg, false, Pred, PredReg, TII);
711        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
712                      BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
713      } else {
714        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
715                      EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
716                      Pred, PredReg, TII);
717        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
718                      OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
719                      Pred, PredReg, TII);
720      }
721      if (isLd)
722        ++NumLDRD2LDR;
723      else
724        ++NumSTRD2STR;
725    }
726
727    MBBI = prior(MBBI);
728    MBB.erase(MI);
729  }
730  return false;
731}
732
733/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
734/// ops of the same base and incrementing offset into LDM / STM ops.
735bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
736  unsigned NumMerges = 0;
737  unsigned NumMemOps = 0;
738  MemOpQueue MemOps;
739  unsigned CurrBase = 0;
740  int CurrOpc = -1;
741  unsigned CurrSize = 0;
742  ARMCC::CondCodes CurrPred = ARMCC::AL;
743  unsigned CurrPredReg = 0;
744  unsigned Position = 0;
745  SmallVector<MachineBasicBlock::iterator,4> Merges;
746
747  RS->enterBasicBlock(&MBB);
748  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
749  while (MBBI != E) {
750    if (FixInvalidRegPairOp(MBB, MBBI))
751      continue;
752
753    bool Advance  = false;
754    bool TryMerge = false;
755    bool Clobber  = false;
756
757    bool isMemOp = isMemoryOp(MBBI);
758    if (isMemOp) {
759      int Opcode = MBBI->getOpcode();
760      unsigned Size = getLSMultipleTransferSize(MBBI);
761      unsigned Base = MBBI->getOperand(1).getReg();
762      unsigned PredReg = 0;
763      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
764      int Offset = getMemoryOpOffset(MBBI);
765      // Watch out for:
766      // r4 := ldr [r5]
767      // r5 := ldr [r5, #4]
768      // r6 := ldr [r5, #8]
769      //
770      // The second ldr has effectively broken the chain even though it
771      // looks like the later ldr(s) use the same base register. Try to
772      // merge the ldr's so far, including this one. But don't try to
773      // combine the following ldr(s).
774      Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
775      if (CurrBase == 0 && !Clobber) {
776        // Start of a new chain.
777        CurrBase = Base;
778        CurrOpc  = Opcode;
779        CurrSize = Size;
780        CurrPred = Pred;
781        CurrPredReg = PredReg;
782        MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
783        NumMemOps++;
784        Advance = true;
785      } else {
786        if (Clobber) {
787          TryMerge = true;
788          Advance = true;
789        }
790
791        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
792          // No need to match PredReg.
793          // Continue adding to the queue.
794          if (Offset > MemOps.back().Offset) {
795            MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
796            NumMemOps++;
797            Advance = true;
798          } else {
799            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
800                 I != E; ++I) {
801              if (Offset < I->Offset) {
802                MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
803                NumMemOps++;
804                Advance = true;
805                break;
806              } else if (Offset == I->Offset) {
807                // Collision! This can't be merged!
808                break;
809              }
810            }
811          }
812        }
813      }
814    }
815
816    if (Advance) {
817      ++Position;
818      ++MBBI;
819    } else
820      TryMerge = true;
821
822    if (TryMerge) {
823      if (NumMemOps > 1) {
824        // Try to find a free register to use as a new base in case it's needed.
825        // First advance to the instruction just before the start of the chain.
826        AdvanceRS(MBB, MemOps);
827        // Find a scratch register. Make sure it's a call clobbered register or
828        // a spilled callee-saved register.
829        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
830        if (!Scratch)
831          Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
832                                      AFI->getSpilledCSRegisters());
833        // Process the load / store instructions.
834        RS->forward(prior(MBBI));
835
836        // Merge ops.
837        Merges.clear();
838        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
839                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
840
841        // Try folding preceeding/trailing base inc/dec into the generated
842        // LDM/STM ops.
843        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
844          if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
845            ++NumMerges;
846        NumMerges += Merges.size();
847
848        // Try folding preceeding/trailing base inc/dec into those load/store
849        // that were not merged to form LDM/STM ops.
850        for (unsigned i = 0; i != NumMemOps; ++i)
851          if (!MemOps[i].Merged)
852            if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
853              ++NumMerges;
854
855        // RS may be pointing to an instruction that's deleted.
856        RS->skipTo(prior(MBBI));
857      } else if (NumMemOps == 1) {
858        // Try folding preceeding/trailing base inc/dec into the single
859        // load/store.
860        if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
861          ++NumMerges;
862          RS->forward(prior(MBBI));
863        }
864      }
865
866      CurrBase = 0;
867      CurrOpc = -1;
868      CurrSize = 0;
869      CurrPred = ARMCC::AL;
870      CurrPredReg = 0;
871      if (NumMemOps) {
872        MemOps.clear();
873        NumMemOps = 0;
874      }
875
876      // If iterator hasn't been advanced and this is not a memory op, skip it.
877      // It can't start a new chain anyway.
878      if (!Advance && !isMemOp && MBBI != E) {
879        ++Position;
880        ++MBBI;
881      }
882    }
883  }
884  return NumMerges > 0;
885}
886
887namespace {
888  struct OffsetCompare {
889    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
890      int LOffset = getMemoryOpOffset(LHS);
891      int ROffset = getMemoryOpOffset(RHS);
892      assert(LHS == RHS || LOffset != ROffset);
893      return LOffset > ROffset;
894    }
895  };
896}
897
898/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
899/// (bx lr) into the preceeding stack restore so it directly restore the value
900/// of LR into pc.
901///   ldmfd sp!, {r7, lr}
902///   bx lr
903/// =>
904///   ldmfd sp!, {r7, pc}
905bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
906  if (MBB.empty()) return false;
907
908  MachineBasicBlock::iterator MBBI = prior(MBB.end());
909  if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
910    MachineInstr *PrevMI = prior(MBBI);
911    if (PrevMI->getOpcode() == ARM::LDM) {
912      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
913      if (MO.getReg() == ARM::LR) {
914        PrevMI->setDesc(TII->get(ARM::LDM_RET));
915        MO.setReg(ARM::PC);
916        MBB.erase(MBBI);
917        return true;
918      }
919    }
920  }
921  return false;
922}
923
924bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
925  const TargetMachine &TM = Fn.getTarget();
926  AFI = Fn.getInfo<ARMFunctionInfo>();
927  TII = TM.getInstrInfo();
928  TRI = TM.getRegisterInfo();
929  RS = new RegScavenger();
930
931  bool Modified = false;
932  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
933       ++MFI) {
934    MachineBasicBlock &MBB = *MFI;
935    Modified |= LoadStoreMultipleOpti(MBB);
936    Modified |= MergeReturnIntoLDM(MBB);
937  }
938
939  delete RS;
940  return Modified;
941}
942
943
944/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
945/// load / stores from consecutive locations close to make it more
946/// likely they will be combined later.
947
948namespace {
949  struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
950    static char ID;
951    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
952
953    const TargetData *TD;
954    const TargetInstrInfo *TII;
955    const TargetRegisterInfo *TRI;
956    const ARMSubtarget *STI;
957    MachineRegisterInfo *MRI;
958
959    virtual bool runOnMachineFunction(MachineFunction &Fn);
960
961    virtual const char *getPassName() const {
962      return "ARM pre- register allocation load / store optimization pass";
963    }
964
965  private:
966    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
967                          unsigned &NewOpc, unsigned &EvenReg,
968                          unsigned &OddReg, unsigned &BaseReg,
969                          unsigned &OffReg, unsigned &Offset,
970                          unsigned &PredReg, ARMCC::CondCodes &Pred);
971    bool RescheduleOps(MachineBasicBlock *MBB,
972                       SmallVector<MachineInstr*, 4> &Ops,
973                       unsigned Base, bool isLd,
974                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
975    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
976  };
977  char ARMPreAllocLoadStoreOpt::ID = 0;
978}
979
980bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
981  TD  = Fn.getTarget().getTargetData();
982  TII = Fn.getTarget().getInstrInfo();
983  TRI = Fn.getTarget().getRegisterInfo();
984  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
985  MRI = &Fn.getRegInfo();
986
987  bool Modified = false;
988  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
989       ++MFI)
990    Modified |= RescheduleLoadStoreInstrs(MFI);
991
992  return Modified;
993}
994
995static bool IsSafeToMove(bool isLd, unsigned Base,
996                         MachineBasicBlock::iterator I,
997                         MachineBasicBlock::iterator E,
998                         SmallPtrSet<MachineInstr*, 4> MoveOps,
999                         const TargetRegisterInfo *TRI) {
1000  // Are there stores / loads / calls between them?
1001  // FIXME: This is overly conservative. We should make use of alias information
1002  // some day.
1003  while (++I != E) {
1004    const TargetInstrDesc &TID = I->getDesc();
1005    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1006      return false;
1007    if (isLd && TID.mayStore())
1008      return false;
1009    if (!isLd) {
1010      if (TID.mayLoad())
1011        return false;
1012      // It's not safe to move the first 'str' down.
1013      // str r1, [r0]
1014      // strh r5, [r0]
1015      // str r4, [r0, #+4]
1016      if (TID.mayStore() && !MoveOps.count(&*I))
1017        return false;
1018    }
1019    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1020      MachineOperand &MO = I->getOperand(j);
1021      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
1022        return false;
1023    }
1024  }
1025  return true;
1026}
1027
1028bool
1029ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1030                                          DebugLoc &dl,
1031                                          unsigned &NewOpc, unsigned &EvenReg,
1032                                          unsigned &OddReg, unsigned &BaseReg,
1033                                          unsigned &OffReg, unsigned &Offset,
1034                                          unsigned &PredReg,
1035                                          ARMCC::CondCodes &Pred) {
1036  // FIXME: FLDS / FSTS -> FLDD / FSTD
1037  unsigned Opcode = Op0->getOpcode();
1038  if (Opcode == ARM::LDR)
1039    NewOpc = ARM::LDRD;
1040  else if (Opcode == ARM::STR)
1041    NewOpc = ARM::STRD;
1042  else
1043    return 0;
1044
1045  // Must sure the base address satisfies i64 ld / st alignment requirement.
1046  if (!Op0->hasOneMemOperand() ||
1047      !Op0->memoperands_begin()->getValue() ||
1048      Op0->memoperands_begin()->isVolatile())
1049    return false;
1050
1051  unsigned Align = Op0->memoperands_begin()->getAlignment();
1052  unsigned ReqAlign = STI->hasV6Ops()
1053    ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
1054  if (Align < ReqAlign)
1055    return false;
1056
1057  // Then make sure the immediate offset fits.
1058  int OffImm = getMemoryOpOffset(Op0);
1059  ARM_AM::AddrOpc AddSub = ARM_AM::add;
1060  if (OffImm < 0) {
1061    AddSub = ARM_AM::sub;
1062    OffImm = - OffImm;
1063  }
1064  if (OffImm >= 256) // 8 bits
1065    return false;
1066  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1067
1068  EvenReg = Op0->getOperand(0).getReg();
1069  OddReg  = Op1->getOperand(0).getReg();
1070  if (EvenReg == OddReg)
1071    return false;
1072  BaseReg = Op0->getOperand(1).getReg();
1073  OffReg = Op0->getOperand(2).getReg();
1074  Pred = getInstrPredicate(Op0, PredReg);
1075  dl = Op0->getDebugLoc();
1076  return true;
1077}
1078
1079bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1080                                 SmallVector<MachineInstr*, 4> &Ops,
1081                                 unsigned Base, bool isLd,
1082                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1083  bool RetVal = false;
1084
1085  // Sort by offset (in reverse order).
1086  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1087
1088  // The loads / stores of the same base are in order. Scan them from first to
1089  // last and check for the followins:
1090  // 1. Any def of base.
1091  // 2. Any gaps.
1092  while (Ops.size() > 1) {
1093    unsigned FirstLoc = ~0U;
1094    unsigned LastLoc = 0;
1095    MachineInstr *FirstOp = 0;
1096    MachineInstr *LastOp = 0;
1097    int LastOffset = 0;
1098    unsigned LastOpcode = 0;
1099    unsigned LastBytes = 0;
1100    unsigned NumMove = 0;
1101    for (int i = Ops.size() - 1; i >= 0; --i) {
1102      MachineInstr *Op = Ops[i];
1103      unsigned Loc = MI2LocMap[Op];
1104      if (Loc <= FirstLoc) {
1105        FirstLoc = Loc;
1106        FirstOp = Op;
1107      }
1108      if (Loc >= LastLoc) {
1109        LastLoc = Loc;
1110        LastOp = Op;
1111      }
1112
1113      unsigned Opcode = Op->getOpcode();
1114      if (LastOpcode && Opcode != LastOpcode)
1115        break;
1116
1117      int Offset = getMemoryOpOffset(Op);
1118      unsigned Bytes = getLSMultipleTransferSize(Op);
1119      if (LastBytes) {
1120        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1121          break;
1122      }
1123      LastOffset = Offset;
1124      LastBytes = Bytes;
1125      LastOpcode = Opcode;
1126      if (++NumMove == 4)
1127        break;
1128    }
1129
1130    if (NumMove <= 1)
1131      Ops.pop_back();
1132    else {
1133      SmallPtrSet<MachineInstr*, 4> MoveOps;
1134      for (int i = NumMove-1; i >= 0; --i)
1135        MoveOps.insert(Ops[i]);
1136
1137      // Be conservative, if the instructions are too far apart, don't
1138      // move them. We want to limit the increase of register pressure.
1139      bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
1140      if (DoMove)
1141        DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
1142      if (!DoMove) {
1143        for (unsigned i = 0; i != NumMove; ++i)
1144          Ops.pop_back();
1145      } else {
1146        // This is the new location for the loads / stores.
1147        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1148        while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
1149          ++InsertPos;
1150
1151        // If we are moving a pair of loads / stores, see if it makes sense
1152        // to try to allocate a pair of registers that can form register pairs.
1153        MachineInstr *Op0 = Ops.back();
1154        MachineInstr *Op1 = Ops[Ops.size()-2];
1155        unsigned EvenReg = 0, OddReg = 0;
1156        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
1157        ARMCC::CondCodes Pred = ARMCC::AL;
1158        unsigned NewOpc = 0;
1159        unsigned Offset = 0;
1160        DebugLoc dl;
1161        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1162                                             EvenReg, OddReg, BaseReg, OffReg,
1163                                             Offset, PredReg, Pred)) {
1164          Ops.pop_back();
1165          Ops.pop_back();
1166
1167          // Form the pair instruction.
1168          if (isLd) {
1169            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
1170              .addReg(EvenReg, RegState::Define)
1171              .addReg(OddReg, RegState::Define)
1172              .addReg(BaseReg).addReg(0).addImm(Offset)
1173              .addImm(Pred).addReg(PredReg);
1174            ++NumLDRDFormed;
1175          } else {
1176            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
1177              .addReg(EvenReg)
1178              .addReg(OddReg)
1179              .addReg(BaseReg).addReg(0).addImm(Offset)
1180              .addImm(Pred).addReg(PredReg);
1181            ++NumSTRDFormed;
1182          }
1183          MBB->erase(Op0);
1184          MBB->erase(Op1);
1185
1186          // Add register allocation hints to form register pairs.
1187          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1188          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1189        } else {
1190          for (unsigned i = 0; i != NumMove; ++i) {
1191            MachineInstr *Op = Ops.back();
1192            Ops.pop_back();
1193            MBB->splice(InsertPos, MBB, Op);
1194          }
1195        }
1196
1197        NumLdStMoved += NumMove;
1198        RetVal = true;
1199      }
1200    }
1201  }
1202
1203  return RetVal;
1204}
1205
1206bool
1207ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1208  bool RetVal = false;
1209
1210  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1211  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1212  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1213  SmallVector<unsigned, 4> LdBases;
1214  SmallVector<unsigned, 4> StBases;
1215
1216  unsigned Loc = 0;
1217  MachineBasicBlock::iterator MBBI = MBB->begin();
1218  MachineBasicBlock::iterator E = MBB->end();
1219  while (MBBI != E) {
1220    for (; MBBI != E; ++MBBI) {
1221      MachineInstr *MI = MBBI;
1222      const TargetInstrDesc &TID = MI->getDesc();
1223      if (TID.isCall() || TID.isTerminator()) {
1224        // Stop at barriers.
1225        ++MBBI;
1226        break;
1227      }
1228
1229      MI2LocMap[MI] = Loc++;
1230      if (!isMemoryOp(MI))
1231        continue;
1232      unsigned PredReg = 0;
1233      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1234        continue;
1235
1236      int Opcode = MI->getOpcode();
1237      bool isLd = Opcode == ARM::LDR ||
1238        Opcode == ARM::FLDS || Opcode == ARM::FLDD;
1239      unsigned Base = MI->getOperand(1).getReg();
1240      int Offset = getMemoryOpOffset(MI);
1241
1242      bool StopHere = false;
1243      if (isLd) {
1244        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1245          Base2LdsMap.find(Base);
1246        if (BI != Base2LdsMap.end()) {
1247          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1248            if (Offset == getMemoryOpOffset(BI->second[i])) {
1249              StopHere = true;
1250              break;
1251            }
1252          }
1253          if (!StopHere)
1254            BI->second.push_back(MI);
1255        } else {
1256          SmallVector<MachineInstr*, 4> MIs;
1257          MIs.push_back(MI);
1258          Base2LdsMap[Base] = MIs;
1259          LdBases.push_back(Base);
1260        }
1261      } else {
1262        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1263          Base2StsMap.find(Base);
1264        if (BI != Base2StsMap.end()) {
1265          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1266            if (Offset == getMemoryOpOffset(BI->second[i])) {
1267              StopHere = true;
1268              break;
1269            }
1270          }
1271          if (!StopHere)
1272            BI->second.push_back(MI);
1273        } else {
1274          SmallVector<MachineInstr*, 4> MIs;
1275          MIs.push_back(MI);
1276          Base2StsMap[Base] = MIs;
1277          StBases.push_back(Base);
1278        }
1279      }
1280
1281      if (StopHere) {
1282        // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
1283        --Loc;
1284        break;
1285      }
1286    }
1287
1288    // Re-schedule loads.
1289    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1290      unsigned Base = LdBases[i];
1291      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1292      if (Lds.size() > 1)
1293        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1294    }
1295
1296    // Re-schedule stores.
1297    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1298      unsigned Base = StBases[i];
1299      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1300      if (Sts.size() > 1)
1301        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1302    }
1303
1304    if (MBBI != E) {
1305      Base2LdsMap.clear();
1306      Base2StsMap.clear();
1307      LdBases.clear();
1308      StBases.clear();
1309    }
1310  }
1311
1312  return RetVal;
1313}
1314
1315
1316/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1317/// optimization pass.
1318FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1319  if (PreAlloc)
1320    return new ARMPreAllocLoadStoreOpt();
1321  return new ARMLoadStoreOpt();
1322}
1323