ARMLoadStoreOptimizer.cpp revision 08c66642d70879cc479b502c048df4f5fdeaefae
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMRegisterInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/Target/TargetData.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/STLExtras.h"
39#include "llvm/ADT/SmallPtrSet.h"
40#include "llvm/ADT/SmallSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43using namespace llvm;
44
45STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46STATISTIC(NumSTMGened , "Number of stm instructions generated");
47STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56
57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58/// load / store instructions to form ldm / stm instructions.
59
60namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62    static char ID;
63    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65    const TargetInstrInfo *TII;
66    const TargetRegisterInfo *TRI;
67    const ARMSubtarget *STI;
68    ARMFunctionInfo *AFI;
69    RegScavenger *RS;
70    bool isThumb2;
71
72    virtual bool runOnMachineFunction(MachineFunction &Fn);
73
74    virtual const char *getPassName() const {
75      return "ARM load / store optimization pass";
76    }
77
78  private:
79    struct MemOpQueueEntry {
80      int Offset;
81      unsigned Reg;
82      bool isKill;
83      unsigned Position;
84      MachineBasicBlock::iterator MBBI;
85      bool Merged;
86      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                      MachineBasicBlock::iterator i)
88        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89    };
90    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91    typedef MemOpQueue::iterator MemOpQueueIter;
92
93    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                  int Offset, unsigned Base, bool BaseKill, int Opcode,
95                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
97    void MergeOpsUpdate(MachineBasicBlock &MBB,
98                        MemOpQueue &MemOps,
99                        unsigned memOpsBegin,
100                        unsigned memOpsEnd,
101                        unsigned insertAfter,
102                        int Offset,
103                        unsigned Base,
104                        bool BaseKill,
105                        int Opcode,
106                        ARMCC::CondCodes Pred,
107                        unsigned PredReg,
108                        unsigned Scratch,
109                        DebugLoc dl,
110                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
111    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
112                      int Opcode, unsigned Size,
113                      ARMCC::CondCodes Pred, unsigned PredReg,
114                      unsigned Scratch, MemOpQueue &MemOps,
115                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
116
117    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
118    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
119                             MachineBasicBlock::iterator &MBBI);
120    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
121                                  MachineBasicBlock::iterator MBBI,
122                                  const TargetInstrInfo *TII,
123                                  bool &Advance,
124                                  MachineBasicBlock::iterator &I);
125    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
126                                   MachineBasicBlock::iterator MBBI,
127                                   bool &Advance,
128                                   MachineBasicBlock::iterator &I);
129    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
130    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
131  };
132  char ARMLoadStoreOpt::ID = 0;
133}
134
135static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
136  switch (Opcode) {
137  default: llvm_unreachable("Unhandled opcode!");
138  case ARM::LDRi12:
139    ++NumLDMGened;
140    switch (Mode) {
141    default: llvm_unreachable("Unhandled submode!");
142    case ARM_AM::ia: return ARM::LDMIA;
143    case ARM_AM::da: return ARM::LDMDA;
144    case ARM_AM::db: return ARM::LDMDB;
145    case ARM_AM::ib: return ARM::LDMIB;
146    }
147    break;
148  case ARM::STRi12:
149    ++NumSTMGened;
150    switch (Mode) {
151    default: llvm_unreachable("Unhandled submode!");
152    case ARM_AM::ia: return ARM::STMIA;
153    case ARM_AM::da: return ARM::STMDA;
154    case ARM_AM::db: return ARM::STMDB;
155    case ARM_AM::ib: return ARM::STMIB;
156    }
157    break;
158  case ARM::t2LDRi8:
159  case ARM::t2LDRi12:
160    ++NumLDMGened;
161    switch (Mode) {
162    default: llvm_unreachable("Unhandled submode!");
163    case ARM_AM::ia: return ARM::t2LDMIA;
164    case ARM_AM::db: return ARM::t2LDMDB;
165    }
166    break;
167  case ARM::t2STRi8:
168  case ARM::t2STRi12:
169    ++NumSTMGened;
170    switch (Mode) {
171    default: llvm_unreachable("Unhandled submode!");
172    case ARM_AM::ia: return ARM::t2STMIA;
173    case ARM_AM::db: return ARM::t2STMDB;
174    }
175    break;
176  case ARM::VLDRS:
177    ++NumVLDMGened;
178    switch (Mode) {
179    default: llvm_unreachable("Unhandled submode!");
180    case ARM_AM::ia: return ARM::VLDMSIA;
181    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
182    }
183    break;
184  case ARM::VSTRS:
185    ++NumVSTMGened;
186    switch (Mode) {
187    default: llvm_unreachable("Unhandled submode!");
188    case ARM_AM::ia: return ARM::VSTMSIA;
189    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
190    }
191    break;
192  case ARM::VLDRD:
193    ++NumVLDMGened;
194    switch (Mode) {
195    default: llvm_unreachable("Unhandled submode!");
196    case ARM_AM::ia: return ARM::VLDMDIA;
197    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
198    }
199    break;
200  case ARM::VSTRD:
201    ++NumVSTMGened;
202    switch (Mode) {
203    default: llvm_unreachable("Unhandled submode!");
204    case ARM_AM::ia: return ARM::VSTMDIA;
205    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
206    }
207    break;
208  }
209
210  return 0;
211}
212
213namespace llvm {
214  namespace ARM_AM {
215
216AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
217  switch (Opcode) {
218  default: llvm_unreachable("Unhandled opcode!");
219  case ARM::LDMIA_RET:
220  case ARM::LDMIA:
221  case ARM::LDMIA_UPD:
222  case ARM::STMIA:
223  case ARM::STMIA_UPD:
224  case ARM::t2LDMIA_RET:
225  case ARM::t2LDMIA:
226  case ARM::t2LDMIA_UPD:
227  case ARM::t2STMIA:
228  case ARM::t2STMIA_UPD:
229  case ARM::VLDMSIA:
230  case ARM::VLDMSIA_UPD:
231  case ARM::VSTMSIA:
232  case ARM::VSTMSIA_UPD:
233  case ARM::VLDMDIA:
234  case ARM::VLDMDIA_UPD:
235  case ARM::VSTMDIA:
236  case ARM::VSTMDIA_UPD:
237    return ARM_AM::ia;
238
239  case ARM::LDMDA:
240  case ARM::LDMDA_UPD:
241  case ARM::STMDA:
242  case ARM::STMDA_UPD:
243    return ARM_AM::da;
244
245  case ARM::LDMDB:
246  case ARM::LDMDB_UPD:
247  case ARM::STMDB:
248  case ARM::STMDB_UPD:
249  case ARM::t2LDMDB:
250  case ARM::t2LDMDB_UPD:
251  case ARM::t2STMDB:
252  case ARM::t2STMDB_UPD:
253  case ARM::VLDMSDB_UPD:
254  case ARM::VSTMSDB_UPD:
255  case ARM::VLDMDDB_UPD:
256  case ARM::VSTMDDB_UPD:
257    return ARM_AM::db;
258
259  case ARM::LDMIB:
260  case ARM::LDMIB_UPD:
261  case ARM::STMIB:
262  case ARM::STMIB_UPD:
263    return ARM_AM::ib;
264  }
265
266  return ARM_AM::bad_am_submode;
267}
268
269  } // end namespace ARM_AM
270} // end namespace llvm
271
272static bool isT2i32Load(unsigned Opc) {
273  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
274}
275
276static bool isi32Load(unsigned Opc) {
277  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
278}
279
280static bool isT2i32Store(unsigned Opc) {
281  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
282}
283
284static bool isi32Store(unsigned Opc) {
285  return Opc == ARM::STRi12 || isT2i32Store(Opc);
286}
287
288/// MergeOps - Create and insert a LDM or STM with Base as base register and
289/// registers in Regs as the register operands that would be loaded / stored.
290/// It returns true if the transformation is done.
291bool
292ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
293                          MachineBasicBlock::iterator MBBI,
294                          int Offset, unsigned Base, bool BaseKill,
295                          int Opcode, ARMCC::CondCodes Pred,
296                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
297                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
298  // Only a single register to load / store. Don't bother.
299  unsigned NumRegs = Regs.size();
300  if (NumRegs <= 1)
301    return false;
302
303  ARM_AM::AMSubMode Mode = ARM_AM::ia;
304  // VFP and Thumb2 do not support IB or DA modes.
305  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
306  bool haveIBAndDA = isNotVFP && !isThumb2;
307  if (Offset == 4 && haveIBAndDA)
308    Mode = ARM_AM::ib;
309  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
310    Mode = ARM_AM::da;
311  else if (Offset == -4 * (int)NumRegs && isNotVFP)
312    // VLDM/VSTM do not support DB mode without also updating the base reg.
313    Mode = ARM_AM::db;
314  else if (Offset != 0) {
315    // Check if this is a supported opcode before we insert instructions to
316    // calculate a new base register.
317    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
318
319    // If starting offset isn't zero, insert a MI to materialize a new base.
320    // But only do so if it is cost effective, i.e. merging more than two
321    // loads / stores.
322    if (NumRegs <= 2)
323      return false;
324
325    unsigned NewBase;
326    if (isi32Load(Opcode))
327      // If it is a load, then just use one of the destination register to
328      // use as the new base.
329      NewBase = Regs[NumRegs-1].first;
330    else {
331      // Use the scratch register to use as a new base.
332      NewBase = Scratch;
333      if (NewBase == 0)
334        return false;
335    }
336    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
337    if (Offset < 0) {
338      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
339      Offset = - Offset;
340    }
341    int ImmedOffset = isThumb2
342      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
343    if (ImmedOffset == -1)
344      // FIXME: Try t2ADDri12 or t2SUBri12?
345      return false;  // Probably not worth it then.
346
347    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
348      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
349      .addImm(Pred).addReg(PredReg).addReg(0);
350    Base = NewBase;
351    BaseKill = true;  // New base is always killed right its use.
352  }
353
354  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
355                Opcode == ARM::VLDRD);
356  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
357  if (!Opcode) return false;
358  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
359    .addReg(Base, getKillRegState(BaseKill))
360    .addImm(Pred).addReg(PredReg);
361  for (unsigned i = 0; i != NumRegs; ++i)
362    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
363                     | getKillRegState(Regs[i].second));
364
365  return true;
366}
367
368// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
369// success.
370void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
371                                     MemOpQueue &memOps,
372                                     unsigned memOpsBegin, unsigned memOpsEnd,
373                                     unsigned insertAfter, int Offset,
374                                     unsigned Base, bool BaseKill,
375                                     int Opcode,
376                                     ARMCC::CondCodes Pred, unsigned PredReg,
377                                     unsigned Scratch,
378                                     DebugLoc dl,
379                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
380  // First calculate which of the registers should be killed by the merged
381  // instruction.
382  const unsigned insertPos = memOps[insertAfter].Position;
383  SmallSet<unsigned, 4> KilledRegs;
384  DenseMap<unsigned, unsigned> Killer;
385  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
386    if (i == memOpsBegin) {
387      i = memOpsEnd;
388      if (i == e)
389        break;
390    }
391    if (memOps[i].Position < insertPos && memOps[i].isKill) {
392      unsigned Reg = memOps[i].Reg;
393      KilledRegs.insert(Reg);
394      Killer[Reg] = i;
395    }
396  }
397
398  SmallVector<std::pair<unsigned, bool>, 8> Regs;
399  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
400    unsigned Reg = memOps[i].Reg;
401    // If we are inserting the merged operation after an operation that
402    // uses the same register, make sure to transfer any kill flag.
403    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
404    Regs.push_back(std::make_pair(Reg, isKill));
405  }
406
407  // Try to do the merge.
408  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
409  ++Loc;
410  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
411                Pred, PredReg, Scratch, dl, Regs))
412    return;
413
414  // Merge succeeded, update records.
415  Merges.push_back(prior(Loc));
416  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
417    // Remove kill flags from any memops that come before insertPos.
418    if (Regs[i-memOpsBegin].second) {
419      unsigned Reg = Regs[i-memOpsBegin].first;
420      if (KilledRegs.count(Reg)) {
421        unsigned j = Killer[Reg];
422        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
423        assert(Idx >= 0 && "Cannot find killing operand");
424        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
425        memOps[j].isKill = false;
426      }
427      memOps[i].isKill = true;
428    }
429    MBB.erase(memOps[i].MBBI);
430    // Update this memop to refer to the merged instruction.
431    // We may need to move kill flags again.
432    memOps[i].Merged = true;
433    memOps[i].MBBI = Merges.back();
434    memOps[i].Position = insertPos;
435  }
436}
437
438/// MergeLDR_STR - Merge a number of load / store instructions into one or more
439/// load / store multiple instructions.
440void
441ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
442                          unsigned Base, int Opcode, unsigned Size,
443                          ARMCC::CondCodes Pred, unsigned PredReg,
444                          unsigned Scratch, MemOpQueue &MemOps,
445                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
446  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
447  int Offset = MemOps[SIndex].Offset;
448  int SOffset = Offset;
449  unsigned insertAfter = SIndex;
450  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
451  DebugLoc dl = Loc->getDebugLoc();
452  const MachineOperand &PMO = Loc->getOperand(0);
453  unsigned PReg = PMO.getReg();
454  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
455    : getARMRegisterNumbering(PReg);
456  unsigned Count = 1;
457  unsigned Limit = ~0U;
458
459  // vldm / vstm limit are 32 for S variants, 16 for D variants.
460
461  switch (Opcode) {
462  default: break;
463  case ARM::VSTRS:
464    Limit = 32;
465    break;
466  case ARM::VSTRD:
467    Limit = 16;
468    break;
469  case ARM::VLDRD:
470    Limit = 16;
471    break;
472  case ARM::VLDRS:
473    Limit = 32;
474    break;
475  }
476
477  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
478    int NewOffset = MemOps[i].Offset;
479    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
480    unsigned Reg = MO.getReg();
481    unsigned RegNum = MO.isUndef() ? UINT_MAX
482      : getARMRegisterNumbering(Reg);
483    // Register numbers must be in ascending order. For VFP / NEON load and
484    // store multiples, the registers must also be consecutive and within the
485    // limit on the number of registers per instruction.
486    if (Reg != ARM::SP &&
487        NewOffset == Offset + (int)Size &&
488        ((isNotVFP && RegNum > PRegNum) ||
489         ((Count < Limit) && RegNum == PRegNum+1))) {
490      Offset += Size;
491      PRegNum = RegNum;
492      ++Count;
493    } else {
494      // Can't merge this in. Try merge the earlier ones first.
495      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
496                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
497      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
498                   MemOps, Merges);
499      return;
500    }
501
502    if (MemOps[i].Position > MemOps[insertAfter].Position)
503      insertAfter = i;
504  }
505
506  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
507  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
508                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
509  return;
510}
511
512static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
513                                       unsigned Bytes, unsigned Limit,
514                                       ARMCC::CondCodes Pred, unsigned PredReg){
515  unsigned MyPredReg = 0;
516  if (!MI)
517    return false;
518  if (MI->getOpcode() != ARM::t2SUBri &&
519      MI->getOpcode() != ARM::tSUBspi &&
520      MI->getOpcode() != ARM::SUBri)
521    return false;
522
523  // Make sure the offset fits in 8 bits.
524  if (Bytes == 0 || (Limit && Bytes >= Limit))
525    return false;
526
527  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
528  return (MI->getOperand(0).getReg() == Base &&
529          MI->getOperand(1).getReg() == Base &&
530          (MI->getOperand(2).getImm()*Scale) == Bytes &&
531          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
532          MyPredReg == PredReg);
533}
534
535static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
536                                       unsigned Bytes, unsigned Limit,
537                                       ARMCC::CondCodes Pred, unsigned PredReg){
538  unsigned MyPredReg = 0;
539  if (!MI)
540    return false;
541  if (MI->getOpcode() != ARM::t2ADDri &&
542      MI->getOpcode() != ARM::tADDspi &&
543      MI->getOpcode() != ARM::ADDri)
544    return false;
545
546  if (Bytes == 0 || (Limit && Bytes >= Limit))
547    // Make sure the offset fits in 8 bits.
548    return false;
549
550  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
551  return (MI->getOperand(0).getReg() == Base &&
552          MI->getOperand(1).getReg() == Base &&
553          (MI->getOperand(2).getImm()*Scale) == Bytes &&
554          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
555          MyPredReg == PredReg);
556}
557
558static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
559  switch (MI->getOpcode()) {
560  default: return 0;
561  case ARM::LDRi12:
562  case ARM::STRi12:
563  case ARM::t2LDRi8:
564  case ARM::t2LDRi12:
565  case ARM::t2STRi8:
566  case ARM::t2STRi12:
567  case ARM::VLDRS:
568  case ARM::VSTRS:
569    return 4;
570  case ARM::VLDRD:
571  case ARM::VSTRD:
572    return 8;
573  case ARM::LDMIA:
574  case ARM::LDMDA:
575  case ARM::LDMDB:
576  case ARM::LDMIB:
577  case ARM::STMIA:
578  case ARM::STMDA:
579  case ARM::STMDB:
580  case ARM::STMIB:
581  case ARM::t2LDMIA:
582  case ARM::t2LDMDB:
583  case ARM::t2STMIA:
584  case ARM::t2STMDB:
585  case ARM::VLDMSIA:
586  case ARM::VSTMSIA:
587    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
588  case ARM::VLDMDIA:
589  case ARM::VSTMDIA:
590    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
591  }
592}
593
594static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
595                                            ARM_AM::AMSubMode Mode) {
596  switch (Opc) {
597  default: llvm_unreachable("Unhandled opcode!");
598  case ARM::LDMIA:
599  case ARM::LDMDA:
600  case ARM::LDMDB:
601  case ARM::LDMIB:
602    switch (Mode) {
603    default: llvm_unreachable("Unhandled submode!");
604    case ARM_AM::ia: return ARM::LDMIA_UPD;
605    case ARM_AM::ib: return ARM::LDMIB_UPD;
606    case ARM_AM::da: return ARM::LDMDA_UPD;
607    case ARM_AM::db: return ARM::LDMDB_UPD;
608    }
609    break;
610  case ARM::STMIA:
611  case ARM::STMDA:
612  case ARM::STMDB:
613  case ARM::STMIB:
614    switch (Mode) {
615    default: llvm_unreachable("Unhandled submode!");
616    case ARM_AM::ia: return ARM::STMIA_UPD;
617    case ARM_AM::ib: return ARM::STMIB_UPD;
618    case ARM_AM::da: return ARM::STMDA_UPD;
619    case ARM_AM::db: return ARM::STMDB_UPD;
620    }
621    break;
622  case ARM::t2LDMIA:
623  case ARM::t2LDMDB:
624    switch (Mode) {
625    default: llvm_unreachable("Unhandled submode!");
626    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
627    case ARM_AM::db: return ARM::t2LDMDB_UPD;
628    }
629    break;
630  case ARM::t2STMIA:
631  case ARM::t2STMDB:
632    switch (Mode) {
633    default: llvm_unreachable("Unhandled submode!");
634    case ARM_AM::ia: return ARM::t2STMIA_UPD;
635    case ARM_AM::db: return ARM::t2STMDB_UPD;
636    }
637    break;
638  case ARM::VLDMSIA:
639    switch (Mode) {
640    default: llvm_unreachable("Unhandled submode!");
641    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
642    case ARM_AM::db: return ARM::VLDMSDB_UPD;
643    }
644    break;
645  case ARM::VLDMDIA:
646    switch (Mode) {
647    default: llvm_unreachable("Unhandled submode!");
648    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
649    case ARM_AM::db: return ARM::VLDMDDB_UPD;
650    }
651    break;
652  case ARM::VSTMSIA:
653    switch (Mode) {
654    default: llvm_unreachable("Unhandled submode!");
655    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
656    case ARM_AM::db: return ARM::VSTMSDB_UPD;
657    }
658    break;
659  case ARM::VSTMDIA:
660    switch (Mode) {
661    default: llvm_unreachable("Unhandled submode!");
662    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
663    case ARM_AM::db: return ARM::VSTMDDB_UPD;
664    }
665    break;
666  }
667
668  return 0;
669}
670
671/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
672/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
673///
674/// stmia rn, <ra, rb, rc>
675/// rn := rn + 4 * 3;
676/// =>
677/// stmia rn!, <ra, rb, rc>
678///
679/// rn := rn - 4 * 3;
680/// ldmia rn, <ra, rb, rc>
681/// =>
682/// ldmdb rn!, <ra, rb, rc>
683bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
684                                               MachineBasicBlock::iterator MBBI,
685                                               bool &Advance,
686                                               MachineBasicBlock::iterator &I) {
687  MachineInstr *MI = MBBI;
688  unsigned Base = MI->getOperand(0).getReg();
689  bool BaseKill = MI->getOperand(0).isKill();
690  unsigned Bytes = getLSMultipleTransferSize(MI);
691  unsigned PredReg = 0;
692  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
693  int Opcode = MI->getOpcode();
694  DebugLoc dl = MI->getDebugLoc();
695
696  // Can't use an updating ld/st if the base register is also a dest
697  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
698  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
699    if (MI->getOperand(i).getReg() == Base)
700      return false;
701
702  bool DoMerge = false;
703  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
704
705  // Try merging with the previous instruction.
706  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
707  if (MBBI != BeginMBBI) {
708    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
709    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
710      --PrevMBBI;
711    if (Mode == ARM_AM::ia &&
712        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
713      Mode = ARM_AM::db;
714      DoMerge = true;
715    } else if (Mode == ARM_AM::ib &&
716               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
717      Mode = ARM_AM::da;
718      DoMerge = true;
719    }
720    if (DoMerge)
721      MBB.erase(PrevMBBI);
722  }
723
724  // Try merging with the next instruction.
725  MachineBasicBlock::iterator EndMBBI = MBB.end();
726  if (!DoMerge && MBBI != EndMBBI) {
727    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
728    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
729      ++NextMBBI;
730    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
731        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
732      DoMerge = true;
733    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
734               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
735      DoMerge = true;
736    }
737    if (DoMerge) {
738      if (NextMBBI == I) {
739        Advance = true;
740        ++I;
741      }
742      MBB.erase(NextMBBI);
743    }
744  }
745
746  if (!DoMerge)
747    return false;
748
749  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
750  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
751    .addReg(Base, getDefRegState(true)) // WB base register
752    .addReg(Base, getKillRegState(BaseKill))
753    .addImm(Pred).addReg(PredReg);
754
755  // Transfer the rest of operands.
756  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
757    MIB.addOperand(MI->getOperand(OpNum));
758
759  // Transfer memoperands.
760  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
761
762  MBB.erase(MBBI);
763  return true;
764}
765
766static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
767                                             ARM_AM::AddrOpc Mode) {
768  switch (Opc) {
769  case ARM::LDRi12:
770    return ARM::LDR_PRE_IMM;
771  case ARM::STRi12:
772    return ARM::STR_PRE_IMM;
773  case ARM::VLDRS:
774    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
775  case ARM::VLDRD:
776    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
777  case ARM::VSTRS:
778    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
779  case ARM::VSTRD:
780    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
781  case ARM::t2LDRi8:
782  case ARM::t2LDRi12:
783    return ARM::t2LDR_PRE;
784  case ARM::t2STRi8:
785  case ARM::t2STRi12:
786    return ARM::t2STR_PRE;
787  default: llvm_unreachable("Unhandled opcode!");
788  }
789  return 0;
790}
791
792static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
793                                              ARM_AM::AddrOpc Mode) {
794  switch (Opc) {
795  case ARM::LDRi12:
796    return ARM::LDR_POST_IMM;
797  case ARM::STRi12:
798    return ARM::STR_POST_IMM;
799  case ARM::VLDRS:
800    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
801  case ARM::VLDRD:
802    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
803  case ARM::VSTRS:
804    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
805  case ARM::VSTRD:
806    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
807  case ARM::t2LDRi8:
808  case ARM::t2LDRi12:
809    return ARM::t2LDR_POST;
810  case ARM::t2STRi8:
811  case ARM::t2STRi12:
812    return ARM::t2STR_POST;
813  default: llvm_unreachable("Unhandled opcode!");
814  }
815  return 0;
816}
817
818/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
819/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
820bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
821                                               MachineBasicBlock::iterator MBBI,
822                                               const TargetInstrInfo *TII,
823                                               bool &Advance,
824                                               MachineBasicBlock::iterator &I) {
825  MachineInstr *MI = MBBI;
826  unsigned Base = MI->getOperand(1).getReg();
827  bool BaseKill = MI->getOperand(1).isKill();
828  unsigned Bytes = getLSMultipleTransferSize(MI);
829  int Opcode = MI->getOpcode();
830  DebugLoc dl = MI->getDebugLoc();
831  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
832                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
833  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
834  if (isi32Load(Opcode) || isi32Store(Opcode))
835    if (MI->getOperand(2).getImm() != 0)
836      return false;
837  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
838    return false;
839
840  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
841  // Can't do the merge if the destination register is the same as the would-be
842  // writeback register.
843  if (isLd && MI->getOperand(0).getReg() == Base)
844    return false;
845
846  unsigned PredReg = 0;
847  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
848  bool DoMerge = false;
849  ARM_AM::AddrOpc AddSub = ARM_AM::add;
850  unsigned NewOpc = 0;
851  // AM2 - 12 bits, thumb2 - 8 bits.
852  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
853
854  // Try merging with the previous instruction.
855  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
856  if (MBBI != BeginMBBI) {
857    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
858    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
859      --PrevMBBI;
860    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
861      DoMerge = true;
862      AddSub = ARM_AM::sub;
863    } else if (!isAM5 &&
864               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
865      DoMerge = true;
866    }
867    if (DoMerge) {
868      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
869      MBB.erase(PrevMBBI);
870    }
871  }
872
873  // Try merging with the next instruction.
874  MachineBasicBlock::iterator EndMBBI = MBB.end();
875  if (!DoMerge && MBBI != EndMBBI) {
876    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
877    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
878      ++NextMBBI;
879    if (!isAM5 &&
880        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
881      DoMerge = true;
882      AddSub = ARM_AM::sub;
883    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
884      DoMerge = true;
885    }
886    if (DoMerge) {
887      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
888      if (NextMBBI == I) {
889        Advance = true;
890        ++I;
891      }
892      MBB.erase(NextMBBI);
893    }
894  }
895
896  if (!DoMerge)
897    return false;
898
899  if (isAM5) {
900    // VLDM[SD}_UPD, VSTM[SD]_UPD
901    // (There are no base-updating versions of VLDR/VSTR instructions, but the
902    // updating load/store-multiple instructions can be used with only one
903    // register.)
904    MachineOperand &MO = MI->getOperand(0);
905    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
906      .addReg(Base, getDefRegState(true)) // WB base register
907      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
908      .addImm(Pred).addReg(PredReg)
909      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
910                            getKillRegState(MO.isKill())));
911  } else if (isLd) {
912    if (isAM2) {
913      // LDR_PRE, LDR_POST
914      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
915        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
916        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
917          .addReg(Base, RegState::Define)
918          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
919      } else {
920        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
921        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
922          .addReg(Base, RegState::Define)
923          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
924      }
925    } else {
926      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
927      // t2LDR_PRE, t2LDR_POST
928      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
929        .addReg(Base, RegState::Define)
930        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
931    }
932  } else {
933    MachineOperand &MO = MI->getOperand(0);
934    // FIXME: post-indexed stores use am2offset_imm, which still encodes
935    // the vestigal zero-reg offset register. When that's fixed, this clause
936    // can be removed entirely.
937    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
938      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
939      // STR_PRE, STR_POST
940      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
941        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
942        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
943    } else {
944      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
945      // t2STR_PRE, t2STR_POST
946      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
947        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
948        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
949    }
950  }
951  MBB.erase(MBBI);
952
953  return true;
954}
955
956/// isMemoryOp - Returns true if instruction is a memory operation that this
957/// pass is capable of operating on.
958static bool isMemoryOp(const MachineInstr *MI) {
959  // When no memory operands are present, conservatively assume unaligned,
960  // volatile, unfoldable.
961  if (!MI->hasOneMemOperand())
962    return false;
963
964  const MachineMemOperand *MMO = *MI->memoperands_begin();
965
966  // Don't touch volatile memory accesses - we may be changing their order.
967  if (MMO->isVolatile())
968    return false;
969
970  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
971  // not.
972  if (MMO->getAlignment() < 4)
973    return false;
974
975  // str <undef> could probably be eliminated entirely, but for now we just want
976  // to avoid making a mess of it.
977  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
978  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
979      MI->getOperand(0).isUndef())
980    return false;
981
982  // Likewise don't mess with references to undefined addresses.
983  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
984      MI->getOperand(1).isUndef())
985    return false;
986
987  int Opcode = MI->getOpcode();
988  switch (Opcode) {
989  default: break;
990  case ARM::VLDRS:
991  case ARM::VSTRS:
992    return MI->getOperand(1).isReg();
993  case ARM::VLDRD:
994  case ARM::VSTRD:
995    return MI->getOperand(1).isReg();
996  case ARM::LDRi12:
997  case ARM::STRi12:
998  case ARM::t2LDRi8:
999  case ARM::t2LDRi12:
1000  case ARM::t2STRi8:
1001  case ARM::t2STRi12:
1002    return MI->getOperand(1).isReg();
1003  }
1004  return false;
1005}
1006
1007/// AdvanceRS - Advance register scavenger to just before the earliest memory
1008/// op that is being merged.
1009void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1010  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1011  unsigned Position = MemOps[0].Position;
1012  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1013    if (MemOps[i].Position < Position) {
1014      Position = MemOps[i].Position;
1015      Loc = MemOps[i].MBBI;
1016    }
1017  }
1018
1019  if (Loc != MBB.begin())
1020    RS->forward(prior(Loc));
1021}
1022
1023static int getMemoryOpOffset(const MachineInstr *MI) {
1024  int Opcode = MI->getOpcode();
1025  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1026  unsigned NumOperands = MI->getDesc().getNumOperands();
1027  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1028
1029  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1030      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1031      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1032      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1033    return OffField;
1034
1035  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1036    : ARM_AM::getAM5Offset(OffField) * 4;
1037  if (isAM3) {
1038    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1039      Offset = -Offset;
1040  } else {
1041    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1042      Offset = -Offset;
1043  }
1044  return Offset;
1045}
1046
1047static void InsertLDR_STR(MachineBasicBlock &MBB,
1048                          MachineBasicBlock::iterator &MBBI,
1049                          int Offset, bool isDef,
1050                          DebugLoc dl, unsigned NewOpc,
1051                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1052                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1053                          bool OffKill, bool OffUndef,
1054                          ARMCC::CondCodes Pred, unsigned PredReg,
1055                          const TargetInstrInfo *TII, bool isT2) {
1056  if (isDef) {
1057    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1058                                      TII->get(NewOpc))
1059      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1060      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1061    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1062  } else {
1063    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1064                                      TII->get(NewOpc))
1065      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1066      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1067    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1068  }
1069}
1070
1071bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1072                                          MachineBasicBlock::iterator &MBBI) {
1073  MachineInstr *MI = &*MBBI;
1074  unsigned Opcode = MI->getOpcode();
1075  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1076      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1077    const MachineOperand &BaseOp = MI->getOperand(2);
1078    unsigned BaseReg = BaseOp.getReg();
1079    unsigned EvenReg = MI->getOperand(0).getReg();
1080    unsigned OddReg  = MI->getOperand(1).getReg();
1081    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1082    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1083    // ARM errata 602117: LDRD with base in list may result in incorrect base
1084    // register when interrupted or faulted.
1085    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1086    if (!Errata602117 &&
1087        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1088      return false;
1089
1090    MachineBasicBlock::iterator NewBBI = MBBI;
1091    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1092    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1093    bool EvenDeadKill = isLd ?
1094      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1095    bool EvenUndef = MI->getOperand(0).isUndef();
1096    bool OddDeadKill  = isLd ?
1097      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1098    bool OddUndef = MI->getOperand(1).isUndef();
1099    bool BaseKill = BaseOp.isKill();
1100    bool BaseUndef = BaseOp.isUndef();
1101    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1102    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1103    int OffImm = getMemoryOpOffset(MI);
1104    unsigned PredReg = 0;
1105    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1106
1107    if (OddRegNum > EvenRegNum && OffImm == 0) {
1108      // Ascending register numbers and no offset. It's safe to change it to a
1109      // ldm or stm.
1110      unsigned NewOpc = (isLd)
1111        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1112        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1113      if (isLd) {
1114        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1115          .addReg(BaseReg, getKillRegState(BaseKill))
1116          .addImm(Pred).addReg(PredReg)
1117          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1118          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1119        ++NumLDRD2LDM;
1120      } else {
1121        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1122          .addReg(BaseReg, getKillRegState(BaseKill))
1123          .addImm(Pred).addReg(PredReg)
1124          .addReg(EvenReg,
1125                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1126          .addReg(OddReg,
1127                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1128        ++NumSTRD2STM;
1129      }
1130      NewBBI = llvm::prior(MBBI);
1131    } else {
1132      // Split into two instructions.
1133      unsigned NewOpc = (isLd)
1134        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1135        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1136      DebugLoc dl = MBBI->getDebugLoc();
1137      // If this is a load and base register is killed, it may have been
1138      // re-defed by the load, make sure the first load does not clobber it.
1139      if (isLd &&
1140          (BaseKill || OffKill) &&
1141          (TRI->regsOverlap(EvenReg, BaseReg))) {
1142        assert(!TRI->regsOverlap(OddReg, BaseReg));
1143        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1144                      OddReg, OddDeadKill, false,
1145                      BaseReg, false, BaseUndef, false, OffUndef,
1146                      Pred, PredReg, TII, isT2);
1147        NewBBI = llvm::prior(MBBI);
1148        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1149                      EvenReg, EvenDeadKill, false,
1150                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1151                      Pred, PredReg, TII, isT2);
1152      } else {
1153        if (OddReg == EvenReg && EvenDeadKill) {
1154          // If the two source operands are the same, the kill marker is
1155          // probably on the first one. e.g.
1156          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1157          EvenDeadKill = false;
1158          OddDeadKill = true;
1159        }
1160        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1161                      EvenReg, EvenDeadKill, EvenUndef,
1162                      BaseReg, false, BaseUndef, false, OffUndef,
1163                      Pred, PredReg, TII, isT2);
1164        NewBBI = llvm::prior(MBBI);
1165        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1166                      OddReg, OddDeadKill, OddUndef,
1167                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1168                      Pred, PredReg, TII, isT2);
1169      }
1170      if (isLd)
1171        ++NumLDRD2LDR;
1172      else
1173        ++NumSTRD2STR;
1174    }
1175
1176    MBB.erase(MI);
1177    MBBI = NewBBI;
1178    return true;
1179  }
1180  return false;
1181}
1182
1183/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1184/// ops of the same base and incrementing offset into LDM / STM ops.
1185bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1186  unsigned NumMerges = 0;
1187  unsigned NumMemOps = 0;
1188  MemOpQueue MemOps;
1189  unsigned CurrBase = 0;
1190  int CurrOpc = -1;
1191  unsigned CurrSize = 0;
1192  ARMCC::CondCodes CurrPred = ARMCC::AL;
1193  unsigned CurrPredReg = 0;
1194  unsigned Position = 0;
1195  SmallVector<MachineBasicBlock::iterator,4> Merges;
1196
1197  RS->enterBasicBlock(&MBB);
1198  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1199  while (MBBI != E) {
1200    if (FixInvalidRegPairOp(MBB, MBBI))
1201      continue;
1202
1203    bool Advance  = false;
1204    bool TryMerge = false;
1205    bool Clobber  = false;
1206
1207    bool isMemOp = isMemoryOp(MBBI);
1208    if (isMemOp) {
1209      int Opcode = MBBI->getOpcode();
1210      unsigned Size = getLSMultipleTransferSize(MBBI);
1211      const MachineOperand &MO = MBBI->getOperand(0);
1212      unsigned Reg = MO.getReg();
1213      bool isKill = MO.isDef() ? false : MO.isKill();
1214      unsigned Base = MBBI->getOperand(1).getReg();
1215      unsigned PredReg = 0;
1216      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1217      int Offset = getMemoryOpOffset(MBBI);
1218      // Watch out for:
1219      // r4 := ldr [r5]
1220      // r5 := ldr [r5, #4]
1221      // r6 := ldr [r5, #8]
1222      //
1223      // The second ldr has effectively broken the chain even though it
1224      // looks like the later ldr(s) use the same base register. Try to
1225      // merge the ldr's so far, including this one. But don't try to
1226      // combine the following ldr(s).
1227      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1228      if (CurrBase == 0 && !Clobber) {
1229        // Start of a new chain.
1230        CurrBase = Base;
1231        CurrOpc  = Opcode;
1232        CurrSize = Size;
1233        CurrPred = Pred;
1234        CurrPredReg = PredReg;
1235        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1236        ++NumMemOps;
1237        Advance = true;
1238      } else {
1239        if (Clobber) {
1240          TryMerge = true;
1241          Advance = true;
1242        }
1243
1244        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1245          // No need to match PredReg.
1246          // Continue adding to the queue.
1247          if (Offset > MemOps.back().Offset) {
1248            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1249                                             Position, MBBI));
1250            ++NumMemOps;
1251            Advance = true;
1252          } else {
1253            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1254                 I != E; ++I) {
1255              if (Offset < I->Offset) {
1256                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1257                                                 Position, MBBI));
1258                ++NumMemOps;
1259                Advance = true;
1260                break;
1261              } else if (Offset == I->Offset) {
1262                // Collision! This can't be merged!
1263                break;
1264              }
1265            }
1266          }
1267        }
1268      }
1269    }
1270
1271    if (MBBI->isDebugValue()) {
1272      ++MBBI;
1273      if (MBBI == E)
1274        // Reach the end of the block, try merging the memory instructions.
1275        TryMerge = true;
1276    } else if (Advance) {
1277      ++Position;
1278      ++MBBI;
1279      if (MBBI == E)
1280        // Reach the end of the block, try merging the memory instructions.
1281        TryMerge = true;
1282    } else
1283      TryMerge = true;
1284
1285    if (TryMerge) {
1286      if (NumMemOps > 1) {
1287        // Try to find a free register to use as a new base in case it's needed.
1288        // First advance to the instruction just before the start of the chain.
1289        AdvanceRS(MBB, MemOps);
1290        // Find a scratch register.
1291        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1292        // Process the load / store instructions.
1293        RS->forward(prior(MBBI));
1294
1295        // Merge ops.
1296        Merges.clear();
1297        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1298                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1299
1300        // Try folding preceding/trailing base inc/dec into the generated
1301        // LDM/STM ops.
1302        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1303          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1304            ++NumMerges;
1305        NumMerges += Merges.size();
1306
1307        // Try folding preceding/trailing base inc/dec into those load/store
1308        // that were not merged to form LDM/STM ops.
1309        for (unsigned i = 0; i != NumMemOps; ++i)
1310          if (!MemOps[i].Merged)
1311            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1312              ++NumMerges;
1313
1314        // RS may be pointing to an instruction that's deleted.
1315        RS->skipTo(prior(MBBI));
1316      } else if (NumMemOps == 1) {
1317        // Try folding preceding/trailing base inc/dec into the single
1318        // load/store.
1319        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1320          ++NumMerges;
1321          RS->forward(prior(MBBI));
1322        }
1323      }
1324
1325      CurrBase = 0;
1326      CurrOpc = -1;
1327      CurrSize = 0;
1328      CurrPred = ARMCC::AL;
1329      CurrPredReg = 0;
1330      if (NumMemOps) {
1331        MemOps.clear();
1332        NumMemOps = 0;
1333      }
1334
1335      // If iterator hasn't been advanced and this is not a memory op, skip it.
1336      // It can't start a new chain anyway.
1337      if (!Advance && !isMemOp && MBBI != E) {
1338        ++Position;
1339        ++MBBI;
1340      }
1341    }
1342  }
1343  return NumMerges > 0;
1344}
1345
1346/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1347/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1348/// directly restore the value of LR into pc.
1349///   ldmfd sp!, {..., lr}
1350///   bx lr
1351/// or
1352///   ldmfd sp!, {..., lr}
1353///   mov pc, lr
1354/// =>
1355///   ldmfd sp!, {..., pc}
1356bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1357  if (MBB.empty()) return false;
1358
1359  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1360  if (MBBI != MBB.begin() &&
1361      (MBBI->getOpcode() == ARM::BX_RET ||
1362       MBBI->getOpcode() == ARM::tBX_RET ||
1363       MBBI->getOpcode() == ARM::MOVPCLR)) {
1364    MachineInstr *PrevMI = prior(MBBI);
1365    unsigned Opcode = PrevMI->getOpcode();
1366    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1367        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1368        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1369      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1370      if (MO.getReg() != ARM::LR)
1371        return false;
1372      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1373      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1374              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1375      PrevMI->setDesc(TII->get(NewOpc));
1376      MO.setReg(ARM::PC);
1377      PrevMI->copyImplicitOps(&*MBBI);
1378      MBB.erase(MBBI);
1379      return true;
1380    }
1381  }
1382  return false;
1383}
1384
1385bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1386  const TargetMachine &TM = Fn.getTarget();
1387  AFI = Fn.getInfo<ARMFunctionInfo>();
1388  TII = TM.getInstrInfo();
1389  TRI = TM.getRegisterInfo();
1390  STI = &TM.getSubtarget<ARMSubtarget>();
1391  RS = new RegScavenger();
1392  isThumb2 = AFI->isThumb2Function();
1393
1394  bool Modified = false;
1395  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1396       ++MFI) {
1397    MachineBasicBlock &MBB = *MFI;
1398    Modified |= LoadStoreMultipleOpti(MBB);
1399    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1400      Modified |= MergeReturnIntoLDM(MBB);
1401  }
1402
1403  delete RS;
1404  return Modified;
1405}
1406
1407
1408/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1409/// load / stores from consecutive locations close to make it more
1410/// likely they will be combined later.
1411
1412namespace {
1413  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1414    static char ID;
1415    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1416
1417    const TargetData *TD;
1418    const TargetInstrInfo *TII;
1419    const TargetRegisterInfo *TRI;
1420    const ARMSubtarget *STI;
1421    MachineRegisterInfo *MRI;
1422    MachineFunction *MF;
1423
1424    virtual bool runOnMachineFunction(MachineFunction &Fn);
1425
1426    virtual const char *getPassName() const {
1427      return "ARM pre- register allocation load / store optimization pass";
1428    }
1429
1430  private:
1431    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1432                          unsigned &NewOpc, unsigned &EvenReg,
1433                          unsigned &OddReg, unsigned &BaseReg,
1434                          int &Offset,
1435                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1436                          bool &isT2);
1437    bool RescheduleOps(MachineBasicBlock *MBB,
1438                       SmallVector<MachineInstr*, 4> &Ops,
1439                       unsigned Base, bool isLd,
1440                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1441    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1442  };
1443  char ARMPreAllocLoadStoreOpt::ID = 0;
1444}
1445
1446bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1447  TD  = Fn.getTarget().getTargetData();
1448  TII = Fn.getTarget().getInstrInfo();
1449  TRI = Fn.getTarget().getRegisterInfo();
1450  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1451  MRI = &Fn.getRegInfo();
1452  MF  = &Fn;
1453
1454  bool Modified = false;
1455  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1456       ++MFI)
1457    Modified |= RescheduleLoadStoreInstrs(MFI);
1458
1459  return Modified;
1460}
1461
1462static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1463                                      MachineBasicBlock::iterator I,
1464                                      MachineBasicBlock::iterator E,
1465                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1466                                      SmallSet<unsigned, 4> &MemRegs,
1467                                      const TargetRegisterInfo *TRI) {
1468  // Are there stores / loads / calls between them?
1469  // FIXME: This is overly conservative. We should make use of alias information
1470  // some day.
1471  SmallSet<unsigned, 4> AddedRegPressure;
1472  while (++I != E) {
1473    if (I->isDebugValue() || MemOps.count(&*I))
1474      continue;
1475    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1476      return false;
1477    if (isLd && I->mayStore())
1478      return false;
1479    if (!isLd) {
1480      if (I->mayLoad())
1481        return false;
1482      // It's not safe to move the first 'str' down.
1483      // str r1, [r0]
1484      // strh r5, [r0]
1485      // str r4, [r0, #+4]
1486      if (I->mayStore())
1487        return false;
1488    }
1489    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1490      MachineOperand &MO = I->getOperand(j);
1491      if (!MO.isReg())
1492        continue;
1493      unsigned Reg = MO.getReg();
1494      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1495        return false;
1496      if (Reg != Base && !MemRegs.count(Reg))
1497        AddedRegPressure.insert(Reg);
1498    }
1499  }
1500
1501  // Estimate register pressure increase due to the transformation.
1502  if (MemRegs.size() <= 4)
1503    // Ok if we are moving small number of instructions.
1504    return true;
1505  return AddedRegPressure.size() <= MemRegs.size() * 2;
1506}
1507
1508
1509/// Copy Op0 and Op1 operands into a new array assigned to MI.
1510static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1511                                   MachineInstr *Op1) {
1512  assert(MI->memoperands_empty() && "expected a new machineinstr");
1513  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1514    + (Op1->memoperands_end() - Op1->memoperands_begin());
1515
1516  MachineFunction *MF = MI->getParent()->getParent();
1517  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1518  MachineSDNode::mmo_iterator MemEnd =
1519    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1520  MemEnd =
1521    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1522  MI->setMemRefs(MemBegin, MemEnd);
1523}
1524
1525bool
1526ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1527                                          DebugLoc &dl,
1528                                          unsigned &NewOpc, unsigned &EvenReg,
1529                                          unsigned &OddReg, unsigned &BaseReg,
1530                                          int &Offset, unsigned &PredReg,
1531                                          ARMCC::CondCodes &Pred,
1532                                          bool &isT2) {
1533  // Make sure we're allowed to generate LDRD/STRD.
1534  if (!STI->hasV5TEOps())
1535    return false;
1536
1537  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1538  unsigned Scale = 1;
1539  unsigned Opcode = Op0->getOpcode();
1540  if (Opcode == ARM::LDRi12)
1541    NewOpc = ARM::LDRD;
1542  else if (Opcode == ARM::STRi12)
1543    NewOpc = ARM::STRD;
1544  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1545    NewOpc = ARM::t2LDRDi8;
1546    Scale = 4;
1547    isT2 = true;
1548  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1549    NewOpc = ARM::t2STRDi8;
1550    Scale = 4;
1551    isT2 = true;
1552  } else
1553    return false;
1554
1555  // Make sure the base address satisfies i64 ld / st alignment requirement.
1556  if (!Op0->hasOneMemOperand() ||
1557      !(*Op0->memoperands_begin())->getValue() ||
1558      (*Op0->memoperands_begin())->isVolatile())
1559    return false;
1560
1561  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1562  const Function *Func = MF->getFunction();
1563  unsigned ReqAlign = STI->hasV6Ops()
1564    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1565    : 8;  // Pre-v6 need 8-byte align
1566  if (Align < ReqAlign)
1567    return false;
1568
1569  // Then make sure the immediate offset fits.
1570  int OffImm = getMemoryOpOffset(Op0);
1571  if (isT2) {
1572    int Limit = (1 << 8) * Scale;
1573    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1574      return false;
1575    Offset = OffImm;
1576  } else {
1577    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1578    if (OffImm < 0) {
1579      AddSub = ARM_AM::sub;
1580      OffImm = - OffImm;
1581    }
1582    int Limit = (1 << 8) * Scale;
1583    if (OffImm >= Limit || (OffImm & (Scale-1)))
1584      return false;
1585    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1586  }
1587  EvenReg = Op0->getOperand(0).getReg();
1588  OddReg  = Op1->getOperand(0).getReg();
1589  if (EvenReg == OddReg)
1590    return false;
1591  BaseReg = Op0->getOperand(1).getReg();
1592  Pred = llvm::getInstrPredicate(Op0, PredReg);
1593  dl = Op0->getDebugLoc();
1594  return true;
1595}
1596
1597namespace {
1598  struct OffsetCompare {
1599    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1600      int LOffset = getMemoryOpOffset(LHS);
1601      int ROffset = getMemoryOpOffset(RHS);
1602      assert(LHS == RHS || LOffset != ROffset);
1603      return LOffset > ROffset;
1604    }
1605  };
1606}
1607
1608bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1609                                 SmallVector<MachineInstr*, 4> &Ops,
1610                                 unsigned Base, bool isLd,
1611                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1612  bool RetVal = false;
1613
1614  // Sort by offset (in reverse order).
1615  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1616
1617  // The loads / stores of the same base are in order. Scan them from first to
1618  // last and check for the following:
1619  // 1. Any def of base.
1620  // 2. Any gaps.
1621  while (Ops.size() > 1) {
1622    unsigned FirstLoc = ~0U;
1623    unsigned LastLoc = 0;
1624    MachineInstr *FirstOp = 0;
1625    MachineInstr *LastOp = 0;
1626    int LastOffset = 0;
1627    unsigned LastOpcode = 0;
1628    unsigned LastBytes = 0;
1629    unsigned NumMove = 0;
1630    for (int i = Ops.size() - 1; i >= 0; --i) {
1631      MachineInstr *Op = Ops[i];
1632      unsigned Loc = MI2LocMap[Op];
1633      if (Loc <= FirstLoc) {
1634        FirstLoc = Loc;
1635        FirstOp = Op;
1636      }
1637      if (Loc >= LastLoc) {
1638        LastLoc = Loc;
1639        LastOp = Op;
1640      }
1641
1642      unsigned LSMOpcode
1643        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1644      if (LastOpcode && LSMOpcode != LastOpcode)
1645        break;
1646
1647      int Offset = getMemoryOpOffset(Op);
1648      unsigned Bytes = getLSMultipleTransferSize(Op);
1649      if (LastBytes) {
1650        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1651          break;
1652      }
1653      LastOffset = Offset;
1654      LastBytes = Bytes;
1655      LastOpcode = LSMOpcode;
1656      if (++NumMove == 8) // FIXME: Tune this limit.
1657        break;
1658    }
1659
1660    if (NumMove <= 1)
1661      Ops.pop_back();
1662    else {
1663      SmallPtrSet<MachineInstr*, 4> MemOps;
1664      SmallSet<unsigned, 4> MemRegs;
1665      for (int i = NumMove-1; i >= 0; --i) {
1666        MemOps.insert(Ops[i]);
1667        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1668      }
1669
1670      // Be conservative, if the instructions are too far apart, don't
1671      // move them. We want to limit the increase of register pressure.
1672      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1673      if (DoMove)
1674        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1675                                           MemOps, MemRegs, TRI);
1676      if (!DoMove) {
1677        for (unsigned i = 0; i != NumMove; ++i)
1678          Ops.pop_back();
1679      } else {
1680        // This is the new location for the loads / stores.
1681        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1682        while (InsertPos != MBB->end()
1683               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1684          ++InsertPos;
1685
1686        // If we are moving a pair of loads / stores, see if it makes sense
1687        // to try to allocate a pair of registers that can form register pairs.
1688        MachineInstr *Op0 = Ops.back();
1689        MachineInstr *Op1 = Ops[Ops.size()-2];
1690        unsigned EvenReg = 0, OddReg = 0;
1691        unsigned BaseReg = 0, PredReg = 0;
1692        ARMCC::CondCodes Pred = ARMCC::AL;
1693        bool isT2 = false;
1694        unsigned NewOpc = 0;
1695        int Offset = 0;
1696        DebugLoc dl;
1697        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1698                                             EvenReg, OddReg, BaseReg,
1699                                             Offset, PredReg, Pred, isT2)) {
1700          Ops.pop_back();
1701          Ops.pop_back();
1702
1703          const MCInstrDesc &MCID = TII->get(NewOpc);
1704          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1705          MRI->constrainRegClass(EvenReg, TRC);
1706          MRI->constrainRegClass(OddReg, TRC);
1707
1708          // Form the pair instruction.
1709          if (isLd) {
1710            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1711              .addReg(EvenReg, RegState::Define)
1712              .addReg(OddReg, RegState::Define)
1713              .addReg(BaseReg);
1714            // FIXME: We're converting from LDRi12 to an insn that still
1715            // uses addrmode2, so we need an explicit offset reg. It should
1716            // always by reg0 since we're transforming LDRi12s.
1717            if (!isT2)
1718              MIB.addReg(0);
1719            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1720            concatenateMemOperands(MIB, Op0, Op1);
1721            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1722            ++NumLDRDFormed;
1723          } else {
1724            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1725              .addReg(EvenReg)
1726              .addReg(OddReg)
1727              .addReg(BaseReg);
1728            // FIXME: We're converting from LDRi12 to an insn that still
1729            // uses addrmode2, so we need an explicit offset reg. It should
1730            // always by reg0 since we're transforming STRi12s.
1731            if (!isT2)
1732              MIB.addReg(0);
1733            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1734            concatenateMemOperands(MIB, Op0, Op1);
1735            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1736            ++NumSTRDFormed;
1737          }
1738          MBB->erase(Op0);
1739          MBB->erase(Op1);
1740
1741          // Add register allocation hints to form register pairs.
1742          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1743          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1744        } else {
1745          for (unsigned i = 0; i != NumMove; ++i) {
1746            MachineInstr *Op = Ops.back();
1747            Ops.pop_back();
1748            MBB->splice(InsertPos, MBB, Op);
1749          }
1750        }
1751
1752        NumLdStMoved += NumMove;
1753        RetVal = true;
1754      }
1755    }
1756  }
1757
1758  return RetVal;
1759}
1760
1761bool
1762ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1763  bool RetVal = false;
1764
1765  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1766  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1767  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1768  SmallVector<unsigned, 4> LdBases;
1769  SmallVector<unsigned, 4> StBases;
1770
1771  unsigned Loc = 0;
1772  MachineBasicBlock::iterator MBBI = MBB->begin();
1773  MachineBasicBlock::iterator E = MBB->end();
1774  while (MBBI != E) {
1775    for (; MBBI != E; ++MBBI) {
1776      MachineInstr *MI = MBBI;
1777      if (MI->isCall() || MI->isTerminator()) {
1778        // Stop at barriers.
1779        ++MBBI;
1780        break;
1781      }
1782
1783      if (!MI->isDebugValue())
1784        MI2LocMap[MI] = ++Loc;
1785
1786      if (!isMemoryOp(MI))
1787        continue;
1788      unsigned PredReg = 0;
1789      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1790        continue;
1791
1792      int Opc = MI->getOpcode();
1793      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1794      unsigned Base = MI->getOperand(1).getReg();
1795      int Offset = getMemoryOpOffset(MI);
1796
1797      bool StopHere = false;
1798      if (isLd) {
1799        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1800          Base2LdsMap.find(Base);
1801        if (BI != Base2LdsMap.end()) {
1802          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1803            if (Offset == getMemoryOpOffset(BI->second[i])) {
1804              StopHere = true;
1805              break;
1806            }
1807          }
1808          if (!StopHere)
1809            BI->second.push_back(MI);
1810        } else {
1811          SmallVector<MachineInstr*, 4> MIs;
1812          MIs.push_back(MI);
1813          Base2LdsMap[Base] = MIs;
1814          LdBases.push_back(Base);
1815        }
1816      } else {
1817        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1818          Base2StsMap.find(Base);
1819        if (BI != Base2StsMap.end()) {
1820          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1821            if (Offset == getMemoryOpOffset(BI->second[i])) {
1822              StopHere = true;
1823              break;
1824            }
1825          }
1826          if (!StopHere)
1827            BI->second.push_back(MI);
1828        } else {
1829          SmallVector<MachineInstr*, 4> MIs;
1830          MIs.push_back(MI);
1831          Base2StsMap[Base] = MIs;
1832          StBases.push_back(Base);
1833        }
1834      }
1835
1836      if (StopHere) {
1837        // Found a duplicate (a base+offset combination that's seen earlier).
1838        // Backtrack.
1839        --Loc;
1840        break;
1841      }
1842    }
1843
1844    // Re-schedule loads.
1845    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1846      unsigned Base = LdBases[i];
1847      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1848      if (Lds.size() > 1)
1849        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1850    }
1851
1852    // Re-schedule stores.
1853    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1854      unsigned Base = StBases[i];
1855      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1856      if (Sts.size() > 1)
1857        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1858    }
1859
1860    if (MBBI != E) {
1861      Base2LdsMap.clear();
1862      Base2StsMap.clear();
1863      LdBases.clear();
1864      StBases.clear();
1865    }
1866  }
1867
1868  return RetVal;
1869}
1870
1871
1872/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1873/// optimization pass.
1874FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1875  if (PreAlloc)
1876    return new ARMPreAllocLoadStoreOpt();
1877  return new ARMLoadStoreOpt();
1878}
1879