ARMLoadStoreOptimizer.cpp revision 2567eec4233d58a2a0cbdcafca9420452689b395
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMAddressingModes.h"
18#include "ARMBaseInstrInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMRegisterInfo.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/Target/TargetData.h"
30#include "llvm/Target/TargetInstrInfo.h"
31#include "llvm/Target/TargetMachine.h"
32#include "llvm/Target/TargetRegisterInfo.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/ADT/DenseMap.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallSet.h"
38#include "llvm/ADT/SmallVector.h"
39#include "llvm/ADT/Statistic.h"
40using namespace llvm;
41
42STATISTIC(NumLDMGened , "Number of ldm instructions generated");
43STATISTIC(NumSTMGened , "Number of stm instructions generated");
44STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
45STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
46STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
48STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
49STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
50STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
51STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
52STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
53
54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
55/// load / store instructions to form ldm / stm instructions.
56
57namespace {
58  struct ARMLoadStoreOpt : public MachineFunctionPass {
59    static char ID;
60    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
61
62    const TargetInstrInfo *TII;
63    const TargetRegisterInfo *TRI;
64    ARMFunctionInfo *AFI;
65    RegScavenger *RS;
66    bool isThumb2;
67
68    virtual bool runOnMachineFunction(MachineFunction &Fn);
69
70    virtual const char *getPassName() const {
71      return "ARM load / store optimization pass";
72    }
73
74  private:
75    struct MemOpQueueEntry {
76      int Offset;
77      unsigned Reg;
78      bool isKill;
79      unsigned Position;
80      MachineBasicBlock::iterator MBBI;
81      bool Merged;
82      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
83                      MachineBasicBlock::iterator i)
84        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
85    };
86    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
87    typedef MemOpQueue::iterator MemOpQueueIter;
88
89    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
90                  int Offset, unsigned Base, bool BaseKill, int Opcode,
91                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
92                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
93    void MergeOpsUpdate(MachineBasicBlock &MBB,
94                        MemOpQueue &MemOps,
95                        unsigned memOpsBegin,
96                        unsigned memOpsEnd,
97                        unsigned insertAfter,
98                        int Offset,
99                        unsigned Base,
100                        bool BaseKill,
101                        int Opcode,
102                        ARMCC::CondCodes Pred,
103                        unsigned PredReg,
104                        unsigned Scratch,
105                        DebugLoc dl,
106                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
107    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
108                      int Opcode, unsigned Size,
109                      ARMCC::CondCodes Pred, unsigned PredReg,
110                      unsigned Scratch, MemOpQueue &MemOps,
111                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
112
113    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
114    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
115                             MachineBasicBlock::iterator &MBBI);
116    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
117                                  MachineBasicBlock::iterator MBBI,
118                                  const TargetInstrInfo *TII,
119                                  bool &Advance,
120                                  MachineBasicBlock::iterator &I);
121    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
122                                   MachineBasicBlock::iterator MBBI,
123                                   bool &Advance,
124                                   MachineBasicBlock::iterator &I);
125    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
126    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
127  };
128  char ARMLoadStoreOpt::ID = 0;
129}
130
131static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
132  switch (Opcode) {
133  default: llvm_unreachable("Unhandled opcode!");
134  case ARM::LDRi12:
135    ++NumLDMGened;
136    switch (Mode) {
137    default: llvm_unreachable("Unhandled submode!");
138    case ARM_AM::ia: return ARM::LDMIA;
139    case ARM_AM::da: return ARM::LDMDA;
140    case ARM_AM::db: return ARM::LDMDB;
141    case ARM_AM::ib: return ARM::LDMIB;
142    }
143    break;
144  case ARM::STRi12:
145    ++NumSTMGened;
146    switch (Mode) {
147    default: llvm_unreachable("Unhandled submode!");
148    case ARM_AM::ia: return ARM::STMIA;
149    case ARM_AM::da: return ARM::STMDA;
150    case ARM_AM::db: return ARM::STMDB;
151    case ARM_AM::ib: return ARM::STMIB;
152    }
153    break;
154  case ARM::t2LDRi8:
155  case ARM::t2LDRi12:
156    ++NumLDMGened;
157    switch (Mode) {
158    default: llvm_unreachable("Unhandled submode!");
159    case ARM_AM::ia: return ARM::t2LDMIA;
160    case ARM_AM::db: return ARM::t2LDMDB;
161    }
162    break;
163  case ARM::t2STRi8:
164  case ARM::t2STRi12:
165    ++NumSTMGened;
166    switch (Mode) {
167    default: llvm_unreachable("Unhandled submode!");
168    case ARM_AM::ia: return ARM::t2STMIA;
169    case ARM_AM::db: return ARM::t2STMDB;
170    }
171    break;
172  case ARM::VLDRS:
173    ++NumVLDMGened;
174    switch (Mode) {
175    default: llvm_unreachable("Unhandled submode!");
176    case ARM_AM::ia: return ARM::VLDMSIA;
177    case ARM_AM::db: return ARM::VLDMSDB;
178    }
179    break;
180  case ARM::VSTRS:
181    ++NumVSTMGened;
182    switch (Mode) {
183    default: llvm_unreachable("Unhandled submode!");
184    case ARM_AM::ia: return ARM::VSTMSIA;
185    case ARM_AM::db: return ARM::VSTMSDB;
186    }
187    break;
188  case ARM::VLDRD:
189    ++NumVLDMGened;
190    switch (Mode) {
191    default: llvm_unreachable("Unhandled submode!");
192    case ARM_AM::ia: return ARM::VLDMDIA;
193    case ARM_AM::db: return ARM::VLDMDDB;
194    }
195    break;
196  case ARM::VSTRD:
197    ++NumVSTMGened;
198    switch (Mode) {
199    default: llvm_unreachable("Unhandled submode!");
200    case ARM_AM::ia: return ARM::VSTMDIA;
201    case ARM_AM::db: return ARM::VSTMDDB;
202    }
203    break;
204  }
205
206  return 0;
207}
208
209namespace llvm {
210  namespace ARM_AM {
211
212AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
213  switch (Opcode) {
214  default: llvm_unreachable("Unhandled opcode!");
215  case ARM::LDMIA:
216  case ARM::STMIA:
217  case ARM::t2LDMIA:
218  case ARM::t2STMIA:
219  case ARM::VLDMSIA:
220  case ARM::VSTMSIA:
221  case ARM::VLDMDIA:
222  case ARM::VSTMDIA:
223    return ARM_AM::ia;
224
225  case ARM::LDMDA:
226  case ARM::STMDA:
227    return ARM_AM::da;
228
229  case ARM::LDMDB:
230  case ARM::STMDB:
231  case ARM::t2LDMDB:
232  case ARM::t2STMDB:
233  case ARM::VLDMSDB:
234  case ARM::VSTMSDB:
235  case ARM::VLDMDDB:
236  case ARM::VSTMDDB:
237    return ARM_AM::db;
238
239  case ARM::LDMIB:
240  case ARM::STMIB:
241    return ARM_AM::ib;
242  }
243
244  return ARM_AM::bad_am_submode;
245}
246
247  } // end namespace ARM_AM
248} // end namespace llvm
249
250static bool isT2i32Load(unsigned Opc) {
251  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
252}
253
254static bool isi32Load(unsigned Opc) {
255  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
256}
257
258static bool isT2i32Store(unsigned Opc) {
259  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
260}
261
262static bool isi32Store(unsigned Opc) {
263  return Opc == ARM::STRi12 || isT2i32Store(Opc);
264}
265
266/// MergeOps - Create and insert a LDM or STM with Base as base register and
267/// registers in Regs as the register operands that would be loaded / stored.
268/// It returns true if the transformation is done.
269bool
270ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
271                          MachineBasicBlock::iterator MBBI,
272                          int Offset, unsigned Base, bool BaseKill,
273                          int Opcode, ARMCC::CondCodes Pred,
274                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
275                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
276  // Only a single register to load / store. Don't bother.
277  unsigned NumRegs = Regs.size();
278  if (NumRegs <= 1)
279    return false;
280
281  ARM_AM::AMSubMode Mode = ARM_AM::ia;
282  // VFP and Thumb2 do not support IB or DA modes.
283  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
284  bool haveIBAndDA = isNotVFP && !isThumb2;
285  if (Offset == 4 && haveIBAndDA)
286    Mode = ARM_AM::ib;
287  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
288    Mode = ARM_AM::da;
289  else if (Offset == -4 * (int)NumRegs && isNotVFP)
290    // VLDM/VSTM do not support DB mode without also updating the base reg.
291    Mode = ARM_AM::db;
292  else if (Offset != 0) {
293    // If starting offset isn't zero, insert a MI to materialize a new base.
294    // But only do so if it is cost effective, i.e. merging more than two
295    // loads / stores.
296    if (NumRegs <= 2)
297      return false;
298
299    unsigned NewBase;
300    if (isi32Load(Opcode))
301      // If it is a load, then just use one of the destination register to
302      // use as the new base.
303      NewBase = Regs[NumRegs-1].first;
304    else {
305      // Use the scratch register to use as a new base.
306      NewBase = Scratch;
307      if (NewBase == 0)
308        return false;
309    }
310    int BaseOpc = !isThumb2
311      ? ARM::ADDri
312      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
313    if (Offset < 0) {
314      BaseOpc = !isThumb2
315        ? ARM::SUBri
316        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
317      Offset = - Offset;
318    }
319    int ImmedOffset = isThumb2
320      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
321    if (ImmedOffset == -1)
322      // FIXME: Try t2ADDri12 or t2SUBri12?
323      return false;  // Probably not worth it then.
324
325    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
326      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
327      .addImm(Pred).addReg(PredReg).addReg(0);
328    Base = NewBase;
329    BaseKill = true;  // New base is always killed right its use.
330  }
331
332  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
333                Opcode == ARM::VLDRD);
334  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
335  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
336    .addReg(Base, getKillRegState(BaseKill))
337    .addImm(Pred).addReg(PredReg);
338  for (unsigned i = 0; i != NumRegs; ++i)
339    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
340                     | getKillRegState(Regs[i].second));
341
342  return true;
343}
344
345// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
346// success.
347void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
348                                     MemOpQueue &memOps,
349                                     unsigned memOpsBegin, unsigned memOpsEnd,
350                                     unsigned insertAfter, int Offset,
351                                     unsigned Base, bool BaseKill,
352                                     int Opcode,
353                                     ARMCC::CondCodes Pred, unsigned PredReg,
354                                     unsigned Scratch,
355                                     DebugLoc dl,
356                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
357  // First calculate which of the registers should be killed by the merged
358  // instruction.
359  const unsigned insertPos = memOps[insertAfter].Position;
360
361  SmallSet<unsigned, 4> UnavailRegs;
362  SmallSet<unsigned, 4> KilledRegs;
363  DenseMap<unsigned, unsigned> Killer;
364  for (unsigned i = 0; i < memOpsBegin; ++i) {
365    if (memOps[i].Position < insertPos && memOps[i].isKill) {
366      unsigned Reg = memOps[i].Reg;
367      if (memOps[i].Merged)
368        UnavailRegs.insert(Reg);
369      else {
370        KilledRegs.insert(Reg);
371        Killer[Reg] = i;
372      }
373    }
374  }
375  for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
376    if (memOps[i].Position < insertPos && memOps[i].isKill) {
377      unsigned Reg = memOps[i].Reg;
378      KilledRegs.insert(Reg);
379      Killer[Reg] = i;
380    }
381  }
382
383  SmallVector<std::pair<unsigned, bool>, 8> Regs;
384  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
385    unsigned Reg = memOps[i].Reg;
386    if (UnavailRegs.count(Reg))
387      // Register is killed before and it's not easy / possible to update the
388      // kill marker on already merged instructions. Abort.
389      return;
390
391    // If we are inserting the merged operation after an unmerged operation that
392    // uses the same register, make sure to transfer any kill flag.
393    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
394    Regs.push_back(std::make_pair(Reg, isKill));
395  }
396
397  // Try to do the merge.
398  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
399  ++Loc;
400  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
401                Pred, PredReg, Scratch, dl, Regs))
402    return;
403
404  // Merge succeeded, update records.
405  Merges.push_back(prior(Loc));
406  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
407    // Remove kill flags from any unmerged memops that come before insertPos.
408    if (Regs[i-memOpsBegin].second) {
409      unsigned Reg = Regs[i-memOpsBegin].first;
410      if (KilledRegs.count(Reg)) {
411        unsigned j = Killer[Reg];
412        memOps[j].MBBI->getOperand(0).setIsKill(false);
413        memOps[j].isKill = false;
414      }
415    }
416    MBB.erase(memOps[i].MBBI);
417    memOps[i].Merged = true;
418  }
419}
420
421/// MergeLDR_STR - Merge a number of load / store instructions into one or more
422/// load / store multiple instructions.
423void
424ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
425                          unsigned Base, int Opcode, unsigned Size,
426                          ARMCC::CondCodes Pred, unsigned PredReg,
427                          unsigned Scratch, MemOpQueue &MemOps,
428                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
429  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
430  int Offset = MemOps[SIndex].Offset;
431  int SOffset = Offset;
432  unsigned insertAfter = SIndex;
433  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
434  DebugLoc dl = Loc->getDebugLoc();
435  const MachineOperand &PMO = Loc->getOperand(0);
436  unsigned PReg = PMO.getReg();
437  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
438    : getARMRegisterNumbering(PReg);
439  unsigned Count = 1;
440
441  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
442    int NewOffset = MemOps[i].Offset;
443    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
444    unsigned Reg = MO.getReg();
445    unsigned RegNum = MO.isUndef() ? UINT_MAX
446      : getARMRegisterNumbering(Reg);
447    // Register numbers must be in ascending order.  For VFP, the registers
448    // must also be consecutive and there is a limit of 16 double-word
449    // registers per instruction.
450    if (Reg != ARM::SP &&
451        NewOffset == Offset + (int)Size &&
452        ((isNotVFP && RegNum > PRegNum)
453         || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
454      Offset += Size;
455      PRegNum = RegNum;
456      ++Count;
457    } else {
458      // Can't merge this in. Try merge the earlier ones first.
459      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
460                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
461      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
462                   MemOps, Merges);
463      return;
464    }
465
466    if (MemOps[i].Position > MemOps[insertAfter].Position)
467      insertAfter = i;
468  }
469
470  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
471  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
472                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
473  return;
474}
475
476static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
477                                       unsigned Bytes, unsigned Limit,
478                                       ARMCC::CondCodes Pred, unsigned PredReg){
479  unsigned MyPredReg = 0;
480  if (!MI)
481    return false;
482  if (MI->getOpcode() != ARM::t2SUBri &&
483      MI->getOpcode() != ARM::t2SUBrSPi &&
484      MI->getOpcode() != ARM::t2SUBrSPi12 &&
485      MI->getOpcode() != ARM::tSUBspi &&
486      MI->getOpcode() != ARM::SUBri)
487    return false;
488
489  // Make sure the offset fits in 8 bits.
490  if (Bytes == 0 || (Limit && Bytes >= Limit))
491    return false;
492
493  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
494  return (MI->getOperand(0).getReg() == Base &&
495          MI->getOperand(1).getReg() == Base &&
496          (MI->getOperand(2).getImm()*Scale) == Bytes &&
497          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
498          MyPredReg == PredReg);
499}
500
501static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
502                                       unsigned Bytes, unsigned Limit,
503                                       ARMCC::CondCodes Pred, unsigned PredReg){
504  unsigned MyPredReg = 0;
505  if (!MI)
506    return false;
507  if (MI->getOpcode() != ARM::t2ADDri &&
508      MI->getOpcode() != ARM::t2ADDrSPi &&
509      MI->getOpcode() != ARM::t2ADDrSPi12 &&
510      MI->getOpcode() != ARM::tADDspi &&
511      MI->getOpcode() != ARM::ADDri)
512    return false;
513
514  if (Bytes == 0 || (Limit && Bytes >= Limit))
515    // Make sure the offset fits in 8 bits.
516    return false;
517
518  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
519  return (MI->getOperand(0).getReg() == Base &&
520          MI->getOperand(1).getReg() == Base &&
521          (MI->getOperand(2).getImm()*Scale) == Bytes &&
522          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
523          MyPredReg == PredReg);
524}
525
526static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
527  switch (MI->getOpcode()) {
528  default: return 0;
529  case ARM::LDRi12:
530  case ARM::STRi12:
531  case ARM::t2LDRi8:
532  case ARM::t2LDRi12:
533  case ARM::t2STRi8:
534  case ARM::t2STRi12:
535  case ARM::VLDRS:
536  case ARM::VSTRS:
537    return 4;
538  case ARM::VLDRD:
539  case ARM::VSTRD:
540    return 8;
541  case ARM::LDMIA:
542  case ARM::LDMDA:
543  case ARM::LDMDB:
544  case ARM::LDMIB:
545  case ARM::STMIA:
546  case ARM::STMDA:
547  case ARM::STMDB:
548  case ARM::STMIB:
549  case ARM::t2LDMIA:
550  case ARM::t2LDMDB:
551  case ARM::t2STMIA:
552  case ARM::t2STMDB:
553  case ARM::VLDMSIA:
554  case ARM::VLDMSDB:
555  case ARM::VSTMSIA:
556  case ARM::VSTMSDB:
557    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
558  case ARM::VLDMDIA:
559  case ARM::VLDMDDB:
560  case ARM::VSTMDIA:
561  case ARM::VSTMDDB:
562    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
563  }
564}
565
566static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
567                                            ARM_AM::AMSubMode Mode) {
568  switch (Opc) {
569  default: llvm_unreachable("Unhandled opcode!");
570  case ARM::LDMIA:
571  case ARM::LDMDA:
572  case ARM::LDMDB:
573  case ARM::LDMIB:
574    switch (Mode) {
575    default: llvm_unreachable("Unhandled submode!");
576    case ARM_AM::ia: return ARM::LDMIA_UPD;
577    case ARM_AM::ib: return ARM::LDMIB_UPD;
578    case ARM_AM::da: return ARM::LDMDA_UPD;
579    case ARM_AM::db: return ARM::LDMDB_UPD;
580    }
581    break;
582  case ARM::STMIA:
583  case ARM::STMDA:
584  case ARM::STMDB:
585  case ARM::STMIB:
586    switch (Mode) {
587    default: llvm_unreachable("Unhandled submode!");
588    case ARM_AM::ia: return ARM::STMIA_UPD;
589    case ARM_AM::ib: return ARM::STMIB_UPD;
590    case ARM_AM::da: return ARM::STMDA_UPD;
591    case ARM_AM::db: return ARM::STMDB_UPD;
592    }
593    break;
594  case ARM::t2LDMIA:
595  case ARM::t2LDMDB:
596    switch (Mode) {
597    default: llvm_unreachable("Unhandled submode!");
598    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
599    case ARM_AM::db: return ARM::t2LDMDB_UPD;
600    }
601    break;
602  case ARM::t2STMIA:
603  case ARM::t2STMDB:
604    switch (Mode) {
605    default: llvm_unreachable("Unhandled submode!");
606    case ARM_AM::ia: return ARM::t2STMIA_UPD;
607    case ARM_AM::db: return ARM::t2STMDB_UPD;
608    }
609    break;
610  case ARM::VLDMSIA:
611  case ARM::VLDMSDB:
612    switch (Mode) {
613    default: llvm_unreachable("Unhandled submode!");
614    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
615    case ARM_AM::db: return ARM::VLDMSDB_UPD;
616    }
617    break;
618  case ARM::VLDMDIA:
619  case ARM::VLDMDDB:
620    switch (Mode) {
621    default: llvm_unreachable("Unhandled submode!");
622    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
623    case ARM_AM::db: return ARM::VLDMDDB_UPD;
624    }
625    break;
626  case ARM::VSTMSIA:
627  case ARM::VSTMSDB:
628    switch (Mode) {
629    default: llvm_unreachable("Unhandled submode!");
630    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
631    case ARM_AM::db: return ARM::VSTMSDB_UPD;
632    }
633    break;
634  case ARM::VSTMDIA:
635  case ARM::VSTMDDB:
636    switch (Mode) {
637    default: llvm_unreachable("Unhandled submode!");
638    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
639    case ARM_AM::db: return ARM::VSTMDDB_UPD;
640    }
641    break;
642  }
643
644  return 0;
645}
646
647/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
648/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
649///
650/// stmia rn, <ra, rb, rc>
651/// rn := rn + 4 * 3;
652/// =>
653/// stmia rn!, <ra, rb, rc>
654///
655/// rn := rn - 4 * 3;
656/// ldmia rn, <ra, rb, rc>
657/// =>
658/// ldmdb rn!, <ra, rb, rc>
659bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
660                                               MachineBasicBlock::iterator MBBI,
661                                               bool &Advance,
662                                               MachineBasicBlock::iterator &I) {
663  MachineInstr *MI = MBBI;
664  unsigned Base = MI->getOperand(0).getReg();
665  bool BaseKill = MI->getOperand(0).isKill();
666  unsigned Bytes = getLSMultipleTransferSize(MI);
667  unsigned PredReg = 0;
668  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
669  int Opcode = MI->getOpcode();
670  DebugLoc dl = MI->getDebugLoc();
671
672  // Can't use an updating ld/st if the base register is also a dest
673  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
674  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
675    if (MI->getOperand(i).getReg() == Base)
676      return false;
677
678  bool DoMerge = false;
679  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
680
681  // Try merging with the previous instruction.
682  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
683  if (MBBI != BeginMBBI) {
684    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
685    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
686      --PrevMBBI;
687    if (Mode == ARM_AM::ia &&
688        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
689      Mode = ARM_AM::db;
690      DoMerge = true;
691    } else if (Mode == ARM_AM::ib &&
692               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
693      Mode = ARM_AM::da;
694      DoMerge = true;
695    }
696    if (DoMerge)
697      MBB.erase(PrevMBBI);
698  }
699
700  // Try merging with the next instruction.
701  MachineBasicBlock::iterator EndMBBI = MBB.end();
702  if (!DoMerge && MBBI != EndMBBI) {
703    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
704    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
705      ++NextMBBI;
706    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
707        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
708      DoMerge = true;
709    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
710               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
711      DoMerge = true;
712    }
713    if (DoMerge) {
714      if (NextMBBI == I) {
715        Advance = true;
716        ++I;
717      }
718      MBB.erase(NextMBBI);
719    }
720  }
721
722  if (!DoMerge)
723    return false;
724
725  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
726  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
727    .addReg(Base, getDefRegState(true)) // WB base register
728    .addReg(Base, getKillRegState(BaseKill))
729    .addImm(Pred).addReg(PredReg);
730
731  // Transfer the rest of operands.
732  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
733    MIB.addOperand(MI->getOperand(OpNum));
734
735  // Transfer memoperands.
736  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
737
738  MBB.erase(MBBI);
739  return true;
740}
741
742static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
743                                             ARM_AM::AddrOpc Mode) {
744  switch (Opc) {
745  case ARM::LDRi12:
746    return ARM::LDR_PRE;
747  case ARM::STRi12:
748    return ARM::STR_PRE;
749  case ARM::VLDRS:
750    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
751  case ARM::VLDRD:
752    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
753  case ARM::VSTRS:
754    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
755  case ARM::VSTRD:
756    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
757  case ARM::t2LDRi8:
758  case ARM::t2LDRi12:
759    return ARM::t2LDR_PRE;
760  case ARM::t2STRi8:
761  case ARM::t2STRi12:
762    return ARM::t2STR_PRE;
763  default: llvm_unreachable("Unhandled opcode!");
764  }
765  return 0;
766}
767
768static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
769                                              ARM_AM::AddrOpc Mode) {
770  switch (Opc) {
771  case ARM::LDRi12:
772    return ARM::LDR_POST;
773  case ARM::STRi12:
774    return ARM::STR_POST;
775  case ARM::VLDRS:
776    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
777  case ARM::VLDRD:
778    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
779  case ARM::VSTRS:
780    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
781  case ARM::VSTRD:
782    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
783  case ARM::t2LDRi8:
784  case ARM::t2LDRi12:
785    return ARM::t2LDR_POST;
786  case ARM::t2STRi8:
787  case ARM::t2STRi12:
788    return ARM::t2STR_POST;
789  default: llvm_unreachable("Unhandled opcode!");
790  }
791  return 0;
792}
793
794/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
795/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
796bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
797                                               MachineBasicBlock::iterator MBBI,
798                                               const TargetInstrInfo *TII,
799                                               bool &Advance,
800                                               MachineBasicBlock::iterator &I) {
801  MachineInstr *MI = MBBI;
802  unsigned Base = MI->getOperand(1).getReg();
803  bool BaseKill = MI->getOperand(1).isKill();
804  unsigned Bytes = getLSMultipleTransferSize(MI);
805  int Opcode = MI->getOpcode();
806  DebugLoc dl = MI->getDebugLoc();
807  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
808                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
809  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
810  if (isi32Load(Opcode) || isi32Store(Opcode))
811    if (MI->getOperand(2).getImm() != 0)
812      return false;
813  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
814    return false;
815
816  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
817  // Can't do the merge if the destination register is the same as the would-be
818  // writeback register.
819  if (isLd && MI->getOperand(0).getReg() == Base)
820    return false;
821
822  unsigned PredReg = 0;
823  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
824  bool DoMerge = false;
825  ARM_AM::AddrOpc AddSub = ARM_AM::add;
826  unsigned NewOpc = 0;
827  // AM2 - 12 bits, thumb2 - 8 bits.
828  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
829
830  // Try merging with the previous instruction.
831  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
832  if (MBBI != BeginMBBI) {
833    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
834    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
835      --PrevMBBI;
836    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
837      DoMerge = true;
838      AddSub = ARM_AM::sub;
839    } else if (!isAM5 &&
840               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
841      DoMerge = true;
842    }
843    if (DoMerge) {
844      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
845      MBB.erase(PrevMBBI);
846    }
847  }
848
849  // Try merging with the next instruction.
850  MachineBasicBlock::iterator EndMBBI = MBB.end();
851  if (!DoMerge && MBBI != EndMBBI) {
852    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
853    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
854      ++NextMBBI;
855    if (!isAM5 &&
856        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
857      DoMerge = true;
858      AddSub = ARM_AM::sub;
859    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
860      DoMerge = true;
861    }
862    if (DoMerge) {
863      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
864      if (NextMBBI == I) {
865        Advance = true;
866        ++I;
867      }
868      MBB.erase(NextMBBI);
869    }
870  }
871
872  if (!DoMerge)
873    return false;
874
875  unsigned Offset = 0;
876  if (isAM2)
877    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
878  else if (!isAM5)
879    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
880
881  if (isAM5) {
882    // VLDM[SD}_UPD, VSTM[SD]_UPD
883    // (There are no base-updating versions of VLDR/VSTR instructions, but the
884    // updating load/store-multiple instructions can be used with only one
885    // register.)
886    MachineOperand &MO = MI->getOperand(0);
887    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
888      .addReg(Base, getDefRegState(true)) // WB base register
889      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
890      .addImm(Pred).addReg(PredReg)
891      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
892                            getKillRegState(MO.isKill())));
893  } else if (isLd) {
894    if (isAM2)
895      // LDR_PRE, LDR_POST,
896      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
897        .addReg(Base, RegState::Define)
898        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
899    else
900      // t2LDR_PRE, t2LDR_POST
901      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
902        .addReg(Base, RegState::Define)
903        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
904  } else {
905    MachineOperand &MO = MI->getOperand(0);
906    if (isAM2)
907      // STR_PRE, STR_POST
908      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
909        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
910        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
911    else
912      // t2STR_PRE, t2STR_POST
913      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
914        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
915        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
916  }
917  MBB.erase(MBBI);
918
919  return true;
920}
921
922/// isMemoryOp - Returns true if instruction is a memory operations (that this
923/// pass is capable of operating on).
924static bool isMemoryOp(const MachineInstr *MI) {
925  // When no memory operands are present, conservatively assume unaligned,
926  // volatile, unfoldable.
927  if (!MI->hasOneMemOperand())
928    return false;
929
930  const MachineMemOperand *MMO = *MI->memoperands_begin();
931
932  // Don't touch volatile memory accesses - we may be changing their order.
933  if (MMO->isVolatile())
934    return false;
935
936  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
937  // not.
938  if (MMO->getAlignment() < 4)
939    return false;
940
941  // str <undef> could probably be eliminated entirely, but for now we just want
942  // to avoid making a mess of it.
943  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
944  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
945      MI->getOperand(0).isUndef())
946    return false;
947
948  // Likewise don't mess with references to undefined addresses.
949  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
950      MI->getOperand(1).isUndef())
951    return false;
952
953  int Opcode = MI->getOpcode();
954  switch (Opcode) {
955  default: break;
956  case ARM::VLDRS:
957  case ARM::VSTRS:
958    return MI->getOperand(1).isReg();
959  case ARM::VLDRD:
960  case ARM::VSTRD:
961    return MI->getOperand(1).isReg();
962  case ARM::LDRi12:
963  case ARM::STRi12:
964  case ARM::t2LDRi8:
965  case ARM::t2LDRi12:
966  case ARM::t2STRi8:
967  case ARM::t2STRi12:
968    return MI->getOperand(1).isReg();
969  }
970  return false;
971}
972
973/// AdvanceRS - Advance register scavenger to just before the earliest memory
974/// op that is being merged.
975void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
976  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
977  unsigned Position = MemOps[0].Position;
978  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
979    if (MemOps[i].Position < Position) {
980      Position = MemOps[i].Position;
981      Loc = MemOps[i].MBBI;
982    }
983  }
984
985  if (Loc != MBB.begin())
986    RS->forward(prior(Loc));
987}
988
989static int getMemoryOpOffset(const MachineInstr *MI) {
990  int Opcode = MI->getOpcode();
991  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
992  unsigned NumOperands = MI->getDesc().getNumOperands();
993  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
994
995  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
996      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
997      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
998      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
999    return OffField;
1000
1001  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1002    : ARM_AM::getAM5Offset(OffField) * 4;
1003  if (isAM3) {
1004    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1005      Offset = -Offset;
1006  } else {
1007    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1008      Offset = -Offset;
1009  }
1010  return Offset;
1011}
1012
1013static void InsertLDR_STR(MachineBasicBlock &MBB,
1014                          MachineBasicBlock::iterator &MBBI,
1015                          int Offset, bool isDef,
1016                          DebugLoc dl, unsigned NewOpc,
1017                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1018                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1019                          bool OffKill, bool OffUndef,
1020                          ARMCC::CondCodes Pred, unsigned PredReg,
1021                          const TargetInstrInfo *TII, bool isT2) {
1022  if (isDef) {
1023    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1024                                      TII->get(NewOpc))
1025      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1026      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1027    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1028  } else {
1029    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1030                                      TII->get(NewOpc))
1031      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1032      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1033    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1034  }
1035}
1036
1037bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1038                                          MachineBasicBlock::iterator &MBBI) {
1039  MachineInstr *MI = &*MBBI;
1040  unsigned Opcode = MI->getOpcode();
1041  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1042      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1043    unsigned EvenReg = MI->getOperand(0).getReg();
1044    unsigned OddReg  = MI->getOperand(1).getReg();
1045    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1046    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1047    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1048      return false;
1049
1050    MachineBasicBlock::iterator NewBBI = MBBI;
1051    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1052    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1053    bool EvenDeadKill = isLd ?
1054      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1055    bool EvenUndef = MI->getOperand(0).isUndef();
1056    bool OddDeadKill  = isLd ?
1057      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1058    bool OddUndef = MI->getOperand(1).isUndef();
1059    const MachineOperand &BaseOp = MI->getOperand(2);
1060    unsigned BaseReg = BaseOp.getReg();
1061    bool BaseKill = BaseOp.isKill();
1062    bool BaseUndef = BaseOp.isUndef();
1063    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1064    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1065    int OffImm = getMemoryOpOffset(MI);
1066    unsigned PredReg = 0;
1067    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1068
1069    if (OddRegNum > EvenRegNum && OffImm == 0) {
1070      // Ascending register numbers and no offset. It's safe to change it to a
1071      // ldm or stm.
1072      unsigned NewOpc = (isLd)
1073        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1074        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1075      if (isLd) {
1076        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1077          .addReg(BaseReg, getKillRegState(BaseKill))
1078          .addImm(Pred).addReg(PredReg)
1079          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1080          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1081        ++NumLDRD2LDM;
1082      } else {
1083        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1084          .addReg(BaseReg, getKillRegState(BaseKill))
1085          .addImm(Pred).addReg(PredReg)
1086          .addReg(EvenReg,
1087                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1088          .addReg(OddReg,
1089                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1090        ++NumSTRD2STM;
1091      }
1092      NewBBI = llvm::prior(MBBI);
1093    } else {
1094      // Split into two instructions.
1095      unsigned NewOpc = (isLd)
1096        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1097        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1098      DebugLoc dl = MBBI->getDebugLoc();
1099      // If this is a load and base register is killed, it may have been
1100      // re-defed by the load, make sure the first load does not clobber it.
1101      if (isLd &&
1102          (BaseKill || OffKill) &&
1103          (TRI->regsOverlap(EvenReg, BaseReg))) {
1104        assert(!TRI->regsOverlap(OddReg, BaseReg));
1105        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1106                      OddReg, OddDeadKill, false,
1107                      BaseReg, false, BaseUndef, false, OffUndef,
1108                      Pred, PredReg, TII, isT2);
1109        NewBBI = llvm::prior(MBBI);
1110        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1111                      EvenReg, EvenDeadKill, false,
1112                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1113                      Pred, PredReg, TII, isT2);
1114      } else {
1115        if (OddReg == EvenReg && EvenDeadKill) {
1116          // If the two source operands are the same, the kill marker is
1117          // probably on the first one. e.g.
1118          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1119          EvenDeadKill = false;
1120          OddDeadKill = true;
1121        }
1122        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1123                      EvenReg, EvenDeadKill, EvenUndef,
1124                      BaseReg, false, BaseUndef, false, OffUndef,
1125                      Pred, PredReg, TII, isT2);
1126        NewBBI = llvm::prior(MBBI);
1127        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1128                      OddReg, OddDeadKill, OddUndef,
1129                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1130                      Pred, PredReg, TII, isT2);
1131      }
1132      if (isLd)
1133        ++NumLDRD2LDR;
1134      else
1135        ++NumSTRD2STR;
1136    }
1137
1138    MBB.erase(MI);
1139    MBBI = NewBBI;
1140    return true;
1141  }
1142  return false;
1143}
1144
1145/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1146/// ops of the same base and incrementing offset into LDM / STM ops.
1147bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1148  unsigned NumMerges = 0;
1149  unsigned NumMemOps = 0;
1150  MemOpQueue MemOps;
1151  unsigned CurrBase = 0;
1152  int CurrOpc = -1;
1153  unsigned CurrSize = 0;
1154  ARMCC::CondCodes CurrPred = ARMCC::AL;
1155  unsigned CurrPredReg = 0;
1156  unsigned Position = 0;
1157  SmallVector<MachineBasicBlock::iterator,4> Merges;
1158
1159  RS->enterBasicBlock(&MBB);
1160  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1161  while (MBBI != E) {
1162    if (FixInvalidRegPairOp(MBB, MBBI))
1163      continue;
1164
1165    bool Advance  = false;
1166    bool TryMerge = false;
1167    bool Clobber  = false;
1168
1169    bool isMemOp = isMemoryOp(MBBI);
1170    if (isMemOp) {
1171      int Opcode = MBBI->getOpcode();
1172      unsigned Size = getLSMultipleTransferSize(MBBI);
1173      const MachineOperand &MO = MBBI->getOperand(0);
1174      unsigned Reg = MO.getReg();
1175      bool isKill = MO.isDef() ? false : MO.isKill();
1176      unsigned Base = MBBI->getOperand(1).getReg();
1177      unsigned PredReg = 0;
1178      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1179      int Offset = getMemoryOpOffset(MBBI);
1180      // Watch out for:
1181      // r4 := ldr [r5]
1182      // r5 := ldr [r5, #4]
1183      // r6 := ldr [r5, #8]
1184      //
1185      // The second ldr has effectively broken the chain even though it
1186      // looks like the later ldr(s) use the same base register. Try to
1187      // merge the ldr's so far, including this one. But don't try to
1188      // combine the following ldr(s).
1189      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1190      if (CurrBase == 0 && !Clobber) {
1191        // Start of a new chain.
1192        CurrBase = Base;
1193        CurrOpc  = Opcode;
1194        CurrSize = Size;
1195        CurrPred = Pred;
1196        CurrPredReg = PredReg;
1197        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1198        ++NumMemOps;
1199        Advance = true;
1200      } else {
1201        if (Clobber) {
1202          TryMerge = true;
1203          Advance = true;
1204        }
1205
1206        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1207          // No need to match PredReg.
1208          // Continue adding to the queue.
1209          if (Offset > MemOps.back().Offset) {
1210            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1211                                             Position, MBBI));
1212            ++NumMemOps;
1213            Advance = true;
1214          } else {
1215            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1216                 I != E; ++I) {
1217              if (Offset < I->Offset) {
1218                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1219                                                 Position, MBBI));
1220                ++NumMemOps;
1221                Advance = true;
1222                break;
1223              } else if (Offset == I->Offset) {
1224                // Collision! This can't be merged!
1225                break;
1226              }
1227            }
1228          }
1229        }
1230      }
1231    }
1232
1233    if (MBBI->isDebugValue()) {
1234      ++MBBI;
1235      if (MBBI == E)
1236        // Reach the end of the block, try merging the memory instructions.
1237        TryMerge = true;
1238    } else if (Advance) {
1239      ++Position;
1240      ++MBBI;
1241      if (MBBI == E)
1242        // Reach the end of the block, try merging the memory instructions.
1243        TryMerge = true;
1244    } else
1245      TryMerge = true;
1246
1247    if (TryMerge) {
1248      if (NumMemOps > 1) {
1249        // Try to find a free register to use as a new base in case it's needed.
1250        // First advance to the instruction just before the start of the chain.
1251        AdvanceRS(MBB, MemOps);
1252        // Find a scratch register.
1253        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1254        // Process the load / store instructions.
1255        RS->forward(prior(MBBI));
1256
1257        // Merge ops.
1258        Merges.clear();
1259        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1260                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1261
1262        // Try folding preceeding/trailing base inc/dec into the generated
1263        // LDM/STM ops.
1264        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1265          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1266            ++NumMerges;
1267        NumMerges += Merges.size();
1268
1269        // Try folding preceeding/trailing base inc/dec into those load/store
1270        // that were not merged to form LDM/STM ops.
1271        for (unsigned i = 0; i != NumMemOps; ++i)
1272          if (!MemOps[i].Merged)
1273            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1274              ++NumMerges;
1275
1276        // RS may be pointing to an instruction that's deleted.
1277        RS->skipTo(prior(MBBI));
1278      } else if (NumMemOps == 1) {
1279        // Try folding preceeding/trailing base inc/dec into the single
1280        // load/store.
1281        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1282          ++NumMerges;
1283          RS->forward(prior(MBBI));
1284        }
1285      }
1286
1287      CurrBase = 0;
1288      CurrOpc = -1;
1289      CurrSize = 0;
1290      CurrPred = ARMCC::AL;
1291      CurrPredReg = 0;
1292      if (NumMemOps) {
1293        MemOps.clear();
1294        NumMemOps = 0;
1295      }
1296
1297      // If iterator hasn't been advanced and this is not a memory op, skip it.
1298      // It can't start a new chain anyway.
1299      if (!Advance && !isMemOp && MBBI != E) {
1300        ++Position;
1301        ++MBBI;
1302      }
1303    }
1304  }
1305  return NumMerges > 0;
1306}
1307
1308namespace {
1309  struct OffsetCompare {
1310    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1311      int LOffset = getMemoryOpOffset(LHS);
1312      int ROffset = getMemoryOpOffset(RHS);
1313      assert(LHS == RHS || LOffset != ROffset);
1314      return LOffset > ROffset;
1315    }
1316  };
1317}
1318
1319/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1320/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
1321/// directly restore the value of LR into pc.
1322///   ldmfd sp!, {..., lr}
1323///   bx lr
1324/// or
1325///   ldmfd sp!, {..., lr}
1326///   mov pc, lr
1327/// =>
1328///   ldmfd sp!, {..., pc}
1329bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1330  if (MBB.empty()) return false;
1331
1332  MachineBasicBlock::iterator MBBI = prior(MBB.end());
1333  if (MBBI != MBB.begin() &&
1334      (MBBI->getOpcode() == ARM::BX_RET ||
1335       MBBI->getOpcode() == ARM::tBX_RET ||
1336       MBBI->getOpcode() == ARM::MOVPCLR)) {
1337    MachineInstr *PrevMI = prior(MBBI);
1338    unsigned Opcode = PrevMI->getOpcode();
1339    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1340        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1341        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1342      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1343      if (MO.getReg() != ARM::LR)
1344        return false;
1345      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1346      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1347              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1348      PrevMI->setDesc(TII->get(NewOpc));
1349      MO.setReg(ARM::PC);
1350      PrevMI->copyImplicitOps(&*MBBI);
1351      MBB.erase(MBBI);
1352      return true;
1353    }
1354  }
1355  return false;
1356}
1357
1358bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1359  const TargetMachine &TM = Fn.getTarget();
1360  AFI = Fn.getInfo<ARMFunctionInfo>();
1361  TII = TM.getInstrInfo();
1362  TRI = TM.getRegisterInfo();
1363  RS = new RegScavenger();
1364  isThumb2 = AFI->isThumb2Function();
1365
1366  bool Modified = false;
1367  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1368       ++MFI) {
1369    MachineBasicBlock &MBB = *MFI;
1370    Modified |= LoadStoreMultipleOpti(MBB);
1371    Modified |= MergeReturnIntoLDM(MBB);
1372  }
1373
1374  delete RS;
1375  return Modified;
1376}
1377
1378
1379/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1380/// load / stores from consecutive locations close to make it more
1381/// likely they will be combined later.
1382
1383namespace {
1384  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1385    static char ID;
1386    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1387
1388    const TargetData *TD;
1389    const TargetInstrInfo *TII;
1390    const TargetRegisterInfo *TRI;
1391    const ARMSubtarget *STI;
1392    MachineRegisterInfo *MRI;
1393    MachineFunction *MF;
1394
1395    virtual bool runOnMachineFunction(MachineFunction &Fn);
1396
1397    virtual const char *getPassName() const {
1398      return "ARM pre- register allocation load / store optimization pass";
1399    }
1400
1401  private:
1402    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1403                          unsigned &NewOpc, unsigned &EvenReg,
1404                          unsigned &OddReg, unsigned &BaseReg,
1405                          int &Offset,
1406                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1407                          bool &isT2);
1408    bool RescheduleOps(MachineBasicBlock *MBB,
1409                       SmallVector<MachineInstr*, 4> &Ops,
1410                       unsigned Base, bool isLd,
1411                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1412    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1413  };
1414  char ARMPreAllocLoadStoreOpt::ID = 0;
1415}
1416
1417bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1418  TD  = Fn.getTarget().getTargetData();
1419  TII = Fn.getTarget().getInstrInfo();
1420  TRI = Fn.getTarget().getRegisterInfo();
1421  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1422  MRI = &Fn.getRegInfo();
1423  MF  = &Fn;
1424
1425  bool Modified = false;
1426  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1427       ++MFI)
1428    Modified |= RescheduleLoadStoreInstrs(MFI);
1429
1430  return Modified;
1431}
1432
1433static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1434                                      MachineBasicBlock::iterator I,
1435                                      MachineBasicBlock::iterator E,
1436                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1437                                      SmallSet<unsigned, 4> &MemRegs,
1438                                      const TargetRegisterInfo *TRI) {
1439  // Are there stores / loads / calls between them?
1440  // FIXME: This is overly conservative. We should make use of alias information
1441  // some day.
1442  SmallSet<unsigned, 4> AddedRegPressure;
1443  while (++I != E) {
1444    if (I->isDebugValue() || MemOps.count(&*I))
1445      continue;
1446    const TargetInstrDesc &TID = I->getDesc();
1447    if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
1448      return false;
1449    if (isLd && TID.mayStore())
1450      return false;
1451    if (!isLd) {
1452      if (TID.mayLoad())
1453        return false;
1454      // It's not safe to move the first 'str' down.
1455      // str r1, [r0]
1456      // strh r5, [r0]
1457      // str r4, [r0, #+4]
1458      if (TID.mayStore())
1459        return false;
1460    }
1461    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1462      MachineOperand &MO = I->getOperand(j);
1463      if (!MO.isReg())
1464        continue;
1465      unsigned Reg = MO.getReg();
1466      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1467        return false;
1468      if (Reg != Base && !MemRegs.count(Reg))
1469        AddedRegPressure.insert(Reg);
1470    }
1471  }
1472
1473  // Estimate register pressure increase due to the transformation.
1474  if (MemRegs.size() <= 4)
1475    // Ok if we are moving small number of instructions.
1476    return true;
1477  return AddedRegPressure.size() <= MemRegs.size() * 2;
1478}
1479
1480bool
1481ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1482                                          DebugLoc &dl,
1483                                          unsigned &NewOpc, unsigned &EvenReg,
1484                                          unsigned &OddReg, unsigned &BaseReg,
1485                                          int &Offset, unsigned &PredReg,
1486                                          ARMCC::CondCodes &Pred,
1487                                          bool &isT2) {
1488  // Make sure we're allowed to generate LDRD/STRD.
1489  if (!STI->hasV5TEOps())
1490    return false;
1491
1492  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1493  unsigned Scale = 1;
1494  unsigned Opcode = Op0->getOpcode();
1495  if (Opcode == ARM::LDRi12)
1496    NewOpc = ARM::LDRD;
1497  else if (Opcode == ARM::STRi12)
1498    NewOpc = ARM::STRD;
1499  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1500    NewOpc = ARM::t2LDRDi8;
1501    Scale = 4;
1502    isT2 = true;
1503  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1504    NewOpc = ARM::t2STRDi8;
1505    Scale = 4;
1506    isT2 = true;
1507  } else
1508    return false;
1509
1510  // Make sure the base address satisfies i64 ld / st alignment requirement.
1511  if (!Op0->hasOneMemOperand() ||
1512      !(*Op0->memoperands_begin())->getValue() ||
1513      (*Op0->memoperands_begin())->isVolatile())
1514    return false;
1515
1516  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1517  const Function *Func = MF->getFunction();
1518  unsigned ReqAlign = STI->hasV6Ops()
1519    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1520    : 8;  // Pre-v6 need 8-byte align
1521  if (Align < ReqAlign)
1522    return false;
1523
1524  // Then make sure the immediate offset fits.
1525  int OffImm = getMemoryOpOffset(Op0);
1526  if (isT2) {
1527    if (OffImm < 0) {
1528      if (OffImm < -255)
1529        // Can't fall back to t2LDRi8 / t2STRi8.
1530        return false;
1531    } else {
1532      int Limit = (1 << 8) * Scale;
1533      if (OffImm >= Limit || (OffImm & (Scale-1)))
1534        return false;
1535    }
1536    Offset = OffImm;
1537  } else {
1538    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1539    if (OffImm < 0) {
1540      AddSub = ARM_AM::sub;
1541      OffImm = - OffImm;
1542    }
1543    int Limit = (1 << 8) * Scale;
1544    if (OffImm >= Limit || (OffImm & (Scale-1)))
1545      return false;
1546    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1547  }
1548  EvenReg = Op0->getOperand(0).getReg();
1549  OddReg  = Op1->getOperand(0).getReg();
1550  if (EvenReg == OddReg)
1551    return false;
1552  BaseReg = Op0->getOperand(1).getReg();
1553  Pred = llvm::getInstrPredicate(Op0, PredReg);
1554  dl = Op0->getDebugLoc();
1555  return true;
1556}
1557
1558static MachineMemOperand *CopyMMO(const MachineMemOperand *MMO,
1559                                  unsigned NewSize, MachineFunction *MF) {
1560  return MF->getMachineMemOperand(MachinePointerInfo(MMO->getValue(),
1561                                                     MMO->getOffset()),
1562                                  MMO->getFlags(), NewSize,
1563                                  MMO->getAlignment(), MMO->getTBAAInfo());
1564}
1565
1566bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1567                                 SmallVector<MachineInstr*, 4> &Ops,
1568                                 unsigned Base, bool isLd,
1569                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1570  bool RetVal = false;
1571
1572  // Sort by offset (in reverse order).
1573  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1574
1575  // The loads / stores of the same base are in order. Scan them from first to
1576  // last and check for the following:
1577  // 1. Any def of base.
1578  // 2. Any gaps.
1579  while (Ops.size() > 1) {
1580    unsigned FirstLoc = ~0U;
1581    unsigned LastLoc = 0;
1582    MachineInstr *FirstOp = 0;
1583    MachineInstr *LastOp = 0;
1584    int LastOffset = 0;
1585    unsigned LastOpcode = 0;
1586    unsigned LastBytes = 0;
1587    unsigned NumMove = 0;
1588    for (int i = Ops.size() - 1; i >= 0; --i) {
1589      MachineInstr *Op = Ops[i];
1590      unsigned Loc = MI2LocMap[Op];
1591      if (Loc <= FirstLoc) {
1592        FirstLoc = Loc;
1593        FirstOp = Op;
1594      }
1595      if (Loc >= LastLoc) {
1596        LastLoc = Loc;
1597        LastOp = Op;
1598      }
1599
1600      unsigned Opcode = Op->getOpcode();
1601      if (LastOpcode && Opcode != LastOpcode)
1602        break;
1603
1604      int Offset = getMemoryOpOffset(Op);
1605      unsigned Bytes = getLSMultipleTransferSize(Op);
1606      if (LastBytes) {
1607        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1608          break;
1609      }
1610      LastOffset = Offset;
1611      LastBytes = Bytes;
1612      LastOpcode = Opcode;
1613      if (++NumMove == 8) // FIXME: Tune this limit.
1614        break;
1615    }
1616
1617    if (NumMove <= 1)
1618      Ops.pop_back();
1619    else {
1620      SmallPtrSet<MachineInstr*, 4> MemOps;
1621      SmallSet<unsigned, 4> MemRegs;
1622      for (int i = NumMove-1; i >= 0; --i) {
1623        MemOps.insert(Ops[i]);
1624        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1625      }
1626
1627      // Be conservative, if the instructions are too far apart, don't
1628      // move them. We want to limit the increase of register pressure.
1629      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1630      if (DoMove)
1631        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1632                                           MemOps, MemRegs, TRI);
1633      if (!DoMove) {
1634        for (unsigned i = 0; i != NumMove; ++i)
1635          Ops.pop_back();
1636      } else {
1637        // This is the new location for the loads / stores.
1638        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1639        while (InsertPos != MBB->end()
1640               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1641          ++InsertPos;
1642
1643        // If we are moving a pair of loads / stores, see if it makes sense
1644        // to try to allocate a pair of registers that can form register pairs.
1645        MachineInstr *Op0 = Ops.back();
1646        MachineInstr *Op1 = Ops[Ops.size()-2];
1647        unsigned EvenReg = 0, OddReg = 0;
1648        unsigned BaseReg = 0, PredReg = 0;
1649        ARMCC::CondCodes Pred = ARMCC::AL;
1650        bool isT2 = false;
1651        unsigned NewOpc = 0;
1652        int Offset = 0;
1653        DebugLoc dl;
1654        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1655                                             EvenReg, OddReg, BaseReg,
1656                                             Offset, PredReg, Pred, isT2)) {
1657          Ops.pop_back();
1658          Ops.pop_back();
1659
1660          // Form the pair instruction.
1661          if (isLd) {
1662            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1663                                              dl, TII->get(NewOpc))
1664              .addReg(EvenReg, RegState::Define)
1665              .addReg(OddReg, RegState::Define)
1666              .addReg(BaseReg);
1667            // FIXME: We're converting from LDRi12 to an insn that still
1668            // uses addrmode2, so we need an explicit offset reg. It should
1669            // always by reg0 since we're transforming LDRi12s.
1670            if (!isT2)
1671              MIB.addReg(0);
1672            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1673
1674            // Copy memoperands bug change size to 8.
1675            for (MachineInstr::mmo_iterator mmo = Op0->memoperands_begin();
1676                 mmo != Op0->memoperands_end(); ++mmo)
1677              MIB.addMemOperand(CopyMMO(*mmo, 8, MF));
1678            ++NumLDRDFormed;
1679          } else {
1680            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
1681                                              dl, TII->get(NewOpc))
1682              .addReg(EvenReg)
1683              .addReg(OddReg)
1684              .addReg(BaseReg);
1685            // FIXME: We're converting from LDRi12 to an insn that still
1686            // uses addrmode2, so we need an explicit offset reg. It should
1687            // always by reg0 since we're transforming STRi12s.
1688            if (!isT2)
1689              MIB.addReg(0);
1690            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1691             // Copy memoperands bug change size to 8.
1692            for (MachineInstr::mmo_iterator mmo = Op0->memoperands_begin();
1693                 mmo != Op0->memoperands_end(); ++mmo)
1694              MIB.addMemOperand(CopyMMO(*mmo, 8, MF));
1695            ++NumSTRDFormed;
1696          }
1697          MBB->erase(Op0);
1698          MBB->erase(Op1);
1699
1700          // Add register allocation hints to form register pairs.
1701          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1702          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1703        } else {
1704          for (unsigned i = 0; i != NumMove; ++i) {
1705            MachineInstr *Op = Ops.back();
1706            Ops.pop_back();
1707            MBB->splice(InsertPos, MBB, Op);
1708          }
1709        }
1710
1711        NumLdStMoved += NumMove;
1712        RetVal = true;
1713      }
1714    }
1715  }
1716
1717  return RetVal;
1718}
1719
1720bool
1721ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1722  bool RetVal = false;
1723
1724  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1725  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1726  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1727  SmallVector<unsigned, 4> LdBases;
1728  SmallVector<unsigned, 4> StBases;
1729
1730  unsigned Loc = 0;
1731  MachineBasicBlock::iterator MBBI = MBB->begin();
1732  MachineBasicBlock::iterator E = MBB->end();
1733  while (MBBI != E) {
1734    for (; MBBI != E; ++MBBI) {
1735      MachineInstr *MI = MBBI;
1736      const TargetInstrDesc &TID = MI->getDesc();
1737      if (TID.isCall() || TID.isTerminator()) {
1738        // Stop at barriers.
1739        ++MBBI;
1740        break;
1741      }
1742
1743      if (!MI->isDebugValue())
1744        MI2LocMap[MI] = ++Loc;
1745
1746      if (!isMemoryOp(MI))
1747        continue;
1748      unsigned PredReg = 0;
1749      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1750        continue;
1751
1752      int Opc = MI->getOpcode();
1753      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1754      unsigned Base = MI->getOperand(1).getReg();
1755      int Offset = getMemoryOpOffset(MI);
1756
1757      bool StopHere = false;
1758      if (isLd) {
1759        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1760          Base2LdsMap.find(Base);
1761        if (BI != Base2LdsMap.end()) {
1762          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1763            if (Offset == getMemoryOpOffset(BI->second[i])) {
1764              StopHere = true;
1765              break;
1766            }
1767          }
1768          if (!StopHere)
1769            BI->second.push_back(MI);
1770        } else {
1771          SmallVector<MachineInstr*, 4> MIs;
1772          MIs.push_back(MI);
1773          Base2LdsMap[Base] = MIs;
1774          LdBases.push_back(Base);
1775        }
1776      } else {
1777        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1778          Base2StsMap.find(Base);
1779        if (BI != Base2StsMap.end()) {
1780          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1781            if (Offset == getMemoryOpOffset(BI->second[i])) {
1782              StopHere = true;
1783              break;
1784            }
1785          }
1786          if (!StopHere)
1787            BI->second.push_back(MI);
1788        } else {
1789          SmallVector<MachineInstr*, 4> MIs;
1790          MIs.push_back(MI);
1791          Base2StsMap[Base] = MIs;
1792          StBases.push_back(Base);
1793        }
1794      }
1795
1796      if (StopHere) {
1797        // Found a duplicate (a base+offset combination that's seen earlier).
1798        // Backtrack.
1799        --Loc;
1800        break;
1801      }
1802    }
1803
1804    // Re-schedule loads.
1805    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1806      unsigned Base = LdBases[i];
1807      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1808      if (Lds.size() > 1)
1809        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1810    }
1811
1812    // Re-schedule stores.
1813    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1814      unsigned Base = StBases[i];
1815      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1816      if (Sts.size() > 1)
1817        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1818    }
1819
1820    if (MBBI != E) {
1821      Base2LdsMap.clear();
1822      Base2StsMap.clear();
1823      LdBases.clear();
1824      StBases.clear();
1825    }
1826  }
1827
1828  return RetVal;
1829}
1830
1831
1832/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1833/// optimization pass.
1834FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1835  if (PreAlloc)
1836    return new ARMPreAllocLoadStoreOpt();
1837  return new ARMLoadStoreOpt();
1838}
1839