ARMLoadStoreOptimizer.cpp revision acb274baacbc104d7ef3b826b443847f11548760
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMMachineFunctionInfo.h"
19#include "ARMRegisterInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/Target/TargetData.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/ADT/DenseMap.h"
36#include "llvm/ADT/STLExtras.h"
37#include "llvm/ADT/SmallPtrSet.h"
38#include "llvm/ADT/SmallSet.h"
39#include "llvm/ADT/SmallVector.h"
40#include "llvm/ADT/Statistic.h"
41using namespace llvm;
42
43STATISTIC(NumLDMGened , "Number of ldm instructions generated");
44STATISTIC(NumSTMGened , "Number of stm instructions generated");
45STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
46STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
47STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
48STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
49STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
50STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
51STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
52STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
53STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
54
55/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
56/// load / store instructions to form ldm / stm instructions.
57
58namespace {
59  struct ARMLoadStoreOpt : public MachineFunctionPass {
60    static char ID;
61    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
62
63    const TargetInstrInfo *TII;
64    const TargetRegisterInfo *TRI;
65    ARMFunctionInfo *AFI;
66    RegScavenger *RS;
67    bool isThumb2;
68
69    virtual bool runOnMachineFunction(MachineFunction &Fn);
70
71    virtual const char *getPassName() const {
72      return "ARM load / store optimization pass";
73    }
74
75  private:
76    struct MemOpQueueEntry {
77      int Offset;
78      unsigned Reg;
79      bool isKill;
80      unsigned Position;
81      MachineBasicBlock::iterator MBBI;
82      bool Merged;
83      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
84                      MachineBasicBlock::iterator i)
85        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
86    };
87    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
88    typedef MemOpQueue::iterator MemOpQueueIter;
89
90    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
91                  int Offset, unsigned Base, bool BaseKill, int Opcode,
92                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
93                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
94    void MergeOpsUpdate(MachineBasicBlock &MBB,
95                        MemOpQueue &MemOps,
96                        unsigned memOpsBegin,
97                        unsigned memOpsEnd,
98                        unsigned insertAfter,
99                        int Offset,
100                        unsigned Base,
101                        bool BaseKill,
102                        int Opcode,
103                        ARMCC::CondCodes Pred,
104                        unsigned PredReg,
105                        unsigned Scratch,
106                        DebugLoc dl,
107                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
108    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
109                      int Opcode, unsigned Size,
110                      ARMCC::CondCodes Pred, unsigned PredReg,
111                      unsigned Scratch, MemOpQueue &MemOps,
112                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113
114    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
115    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
116                             MachineBasicBlock::iterator &MBBI);
117    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
118                                  MachineBasicBlock::iterator MBBI,
119                                  const TargetInstrInfo *TII,
120                                  bool &Advance,
121                                  MachineBasicBlock::iterator &I);
122    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
123                                   MachineBasicBlock::iterator MBBI,
124                                   bool &Advance,
125                                   MachineBasicBlock::iterator &I);
126    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
127    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
128  };
129  char ARMLoadStoreOpt::ID = 0;
130}
131
132static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
133  switch (Opcode) {
134  default: llvm_unreachable("Unhandled opcode!");
135  case ARM::LDRi12:
136    ++NumLDMGened;
137    switch (Mode) {
138    default: llvm_unreachable("Unhandled submode!");
139    case ARM_AM::ia: return ARM::LDMIA;
140    case ARM_AM::da: return ARM::LDMDA;
141    case ARM_AM::db: return ARM::LDMDB;
142    case ARM_AM::ib: return ARM::LDMIB;
143    }
144    break;
145  case ARM::STRi12:
146    ++NumSTMGened;
147    switch (Mode) {
148    default: llvm_unreachable("Unhandled submode!");
149    case ARM_AM::ia: return ARM::STMIA;
150    case ARM_AM::da: return ARM::STMDA;
151    case ARM_AM::db: return ARM::STMDB;
152    case ARM_AM::ib: return ARM::STMIB;
153    }
154    break;
155  case ARM::t2LDRi8:
156  case ARM::t2LDRi12:
157    ++NumLDMGened;
158    switch (Mode) {
159    default: llvm_unreachable("Unhandled submode!");
160    case ARM_AM::ia: return ARM::t2LDMIA;
161    case ARM_AM::db: return ARM::t2LDMDB;
162    }
163    break;
164  case ARM::t2STRi8:
165  case ARM::t2STRi12:
166    ++NumSTMGened;
167    switch (Mode) {
168    default: llvm_unreachable("Unhandled submode!");
169    case ARM_AM::ia: return ARM::t2STMIA;
170    case ARM_AM::db: return ARM::t2STMDB;
171    }
172    break;
173  case ARM::VLDRS:
174    ++NumVLDMGened;
175    switch (Mode) {
176    default: llvm_unreachable("Unhandled submode!");
177    case ARM_AM::ia: return ARM::VLDMSIA;
178    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
179    }
180    break;
181  case ARM::VSTRS:
182    ++NumVSTMGened;
183    switch (Mode) {
184    default: llvm_unreachable("Unhandled submode!");
185    case ARM_AM::ia: return ARM::VSTMSIA;
186    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187    }
188    break;
189  case ARM::VLDRD:
190    ++NumVLDMGened;
191    switch (Mode) {
192    default: llvm_unreachable("Unhandled submode!");
193    case ARM_AM::ia: return ARM::VLDMDIA;
194    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
195    }
196    break;
197  case ARM::VSTRD:
198    ++NumVSTMGened;
199    switch (Mode) {
200    default: llvm_unreachable("Unhandled submode!");
201    case ARM_AM::ia: return ARM::VSTMDIA;
202    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
203    }
204    break;
205  }
206
207  return 0;
208}
209
210namespace llvm {
211  namespace ARM_AM {
212
213AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
214  switch (Opcode) {
215  default: llvm_unreachable("Unhandled opcode!");
216  case ARM::LDMIA_RET:
217  case ARM::LDMIA:
218  case ARM::LDMIA_UPD:
219  case ARM::STMIA:
220  case ARM::STMIA_UPD:
221  case ARM::t2LDMIA_RET:
222  case ARM::t2LDMIA:
223  case ARM::t2LDMIA_UPD:
224  case ARM::t2STMIA:
225  case ARM::t2STMIA_UPD:
226  case ARM::VLDMSIA:
227  case ARM::VLDMSIA_UPD:
228  case ARM::VSTMSIA:
229  case ARM::VSTMSIA_UPD:
230  case ARM::VLDMDIA:
231  case ARM::VLDMDIA_UPD:
232  case ARM::VSTMDIA:
233  case ARM::VSTMDIA_UPD:
234    return ARM_AM::ia;
235
236  case ARM::LDMDA:
237  case ARM::LDMDA_UPD:
238  case ARM::STMDA:
239  case ARM::STMDA_UPD:
240    return ARM_AM::da;
241
242  case ARM::LDMDB:
243  case ARM::LDMDB_UPD:
244  case ARM::STMDB:
245  case ARM::STMDB_UPD:
246  case ARM::t2LDMDB:
247  case ARM::t2LDMDB_UPD:
248  case ARM::t2STMDB:
249  case ARM::t2STMDB_UPD:
250  case ARM::VLDMSDB_UPD:
251  case ARM::VSTMSDB_UPD:
252  case ARM::VLDMDDB_UPD:
253  case ARM::VSTMDDB_UPD:
254    return ARM_AM::db;
255
256  case ARM::LDMIB:
257  case ARM::LDMIB_UPD:
258  case ARM::STMIB:
259  case ARM::STMIB_UPD:
260    return ARM_AM::ib;
261  }
262
263  return ARM_AM::bad_am_submode;
264}
265
266  } // end namespace ARM_AM
267} // end namespace llvm
268
269static bool isT2i32Load(unsigned Opc) {
270  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
271}
272
273static bool isi32Load(unsigned Opc) {
274  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
275}
276
277static bool isT2i32Store(unsigned Opc) {
278  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
279}
280
281static bool isi32Store(unsigned Opc) {
282  return Opc == ARM::STRi12 || isT2i32Store(Opc);
283}
284
285/// MergeOps - Create and insert a LDM or STM with Base as base register and
286/// registers in Regs as the register operands that would be loaded / stored.
287/// It returns true if the transformation is done.
288bool
289ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
290                          MachineBasicBlock::iterator MBBI,
291                          int Offset, unsigned Base, bool BaseKill,
292                          int Opcode, ARMCC::CondCodes Pred,
293                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
294                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
295  // Only a single register to load / store. Don't bother.
296  unsigned NumRegs = Regs.size();
297  if (NumRegs <= 1)
298    return false;
299
300  ARM_AM::AMSubMode Mode = ARM_AM::ia;
301  // VFP and Thumb2 do not support IB or DA modes.
302  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
303  bool haveIBAndDA = isNotVFP && !isThumb2;
304  if (Offset == 4 && haveIBAndDA)
305    Mode = ARM_AM::ib;
306  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
307    Mode = ARM_AM::da;
308  else if (Offset == -4 * (int)NumRegs && isNotVFP)
309    // VLDM/VSTM do not support DB mode without also updating the base reg.
310    Mode = ARM_AM::db;
311  else if (Offset != 0) {
312    // Check if this is a supported opcode before we insert instructions to
313    // calculate a new base register.
314    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
315
316    // If starting offset isn't zero, insert a MI to materialize a new base.
317    // But only do so if it is cost effective, i.e. merging more than two
318    // loads / stores.
319    if (NumRegs <= 2)
320      return false;
321
322    unsigned NewBase;
323    if (isi32Load(Opcode))
324      // If it is a load, then just use one of the destination register to
325      // use as the new base.
326      NewBase = Regs[NumRegs-1].first;
327    else {
328      // Use the scratch register to use as a new base.
329      NewBase = Scratch;
330      if (NewBase == 0)
331        return false;
332    }
333    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
334    if (Offset < 0) {
335      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
336      Offset = - Offset;
337    }
338    int ImmedOffset = isThumb2
339      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
340    if (ImmedOffset == -1)
341      // FIXME: Try t2ADDri12 or t2SUBri12?
342      return false;  // Probably not worth it then.
343
344    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
345      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
346      .addImm(Pred).addReg(PredReg).addReg(0);
347    Base = NewBase;
348    BaseKill = true;  // New base is always killed right its use.
349  }
350
351  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
352                Opcode == ARM::VLDRD);
353  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
354  if (!Opcode) return false;
355  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
356    .addReg(Base, getKillRegState(BaseKill))
357    .addImm(Pred).addReg(PredReg);
358  for (unsigned i = 0; i != NumRegs; ++i)
359    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
360                     | getKillRegState(Regs[i].second));
361
362  return true;
363}
364
365// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
366// success.
367void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
368                                     MemOpQueue &memOps,
369                                     unsigned memOpsBegin, unsigned memOpsEnd,
370                                     unsigned insertAfter, int Offset,
371                                     unsigned Base, bool BaseKill,
372                                     int Opcode,
373                                     ARMCC::CondCodes Pred, unsigned PredReg,
374                                     unsigned Scratch,
375                                     DebugLoc dl,
376                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
377  // First calculate which of the registers should be killed by the merged
378  // instruction.
379  const unsigned insertPos = memOps[insertAfter].Position;
380  SmallSet<unsigned, 4> KilledRegs;
381  DenseMap<unsigned, unsigned> Killer;
382  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
383    if (i == memOpsBegin) {
384      i = memOpsEnd;
385      if (i == e)
386        break;
387    }
388    if (memOps[i].Position < insertPos && memOps[i].isKill) {
389      unsigned Reg = memOps[i].Reg;
390      KilledRegs.insert(Reg);
391      Killer[Reg] = i;
392    }
393  }
394
395  SmallVector<std::pair<unsigned, bool>, 8> Regs;
396  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
397    unsigned Reg = memOps[i].Reg;
398    // If we are inserting the merged operation after an operation that
399    // uses the same register, make sure to transfer any kill flag.
400    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
401    Regs.push_back(std::make_pair(Reg, isKill));
402  }
403
404  // Try to do the merge.
405  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
406  ++Loc;
407  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
408                Pred, PredReg, Scratch, dl, Regs))
409    return;
410
411  // Merge succeeded, update records.
412  Merges.push_back(prior(Loc));
413  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
414    // Remove kill flags from any memops that come before insertPos.
415    if (Regs[i-memOpsBegin].second) {
416      unsigned Reg = Regs[i-memOpsBegin].first;
417      if (KilledRegs.count(Reg)) {
418        unsigned j = Killer[Reg];
419        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
420        assert(Idx >= 0 && "Cannot find killing operand");
421        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
422        memOps[j].isKill = false;
423      }
424      memOps[i].isKill = true;
425    }
426    MBB.erase(memOps[i].MBBI);
427    // Update this memop to refer to the merged instruction.
428    // We may need to move kill flags again.
429    memOps[i].Merged = true;
430    memOps[i].MBBI = Merges.back();
431    memOps[i].Position = insertPos;
432  }
433}
434
435/// MergeLDR_STR - Merge a number of load / store instructions into one or more
436/// load / store multiple instructions.
437void
438ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
439                          unsigned Base, int Opcode, unsigned Size,
440                          ARMCC::CondCodes Pred, unsigned PredReg,
441                          unsigned Scratch, MemOpQueue &MemOps,
442                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
443  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
444  int Offset = MemOps[SIndex].Offset;
445  int SOffset = Offset;
446  unsigned insertAfter = SIndex;
447  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
448  DebugLoc dl = Loc->getDebugLoc();
449  const MachineOperand &PMO = Loc->getOperand(0);
450  unsigned PReg = PMO.getReg();
451  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
452    : getARMRegisterNumbering(PReg);
453  unsigned Count = 1;
454  unsigned Limit = ~0U;
455
456  // vldm / vstm limit are 32 for S variants, 16 for D variants.
457
458  switch (Opcode) {
459  default: break;
460  case ARM::VSTRS:
461    Limit = 32;
462    break;
463  case ARM::VSTRD:
464    Limit = 16;
465    break;
466  case ARM::VLDRD:
467    Limit = 16;
468    break;
469  case ARM::VLDRS:
470    Limit = 32;
471    break;
472  }
473
474  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
475    int NewOffset = MemOps[i].Offset;
476    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
477    unsigned Reg = MO.getReg();
478    unsigned RegNum = MO.isUndef() ? UINT_MAX
479      : getARMRegisterNumbering(Reg);
480    // Register numbers must be in ascending order. For VFP / NEON load and
481    // store multiples, the registers must also be consecutive and within the
482    // limit on the number of registers per instruction.
483    if (Reg != ARM::SP &&
484        NewOffset == Offset + (int)Size &&
485        ((isNotVFP && RegNum > PRegNum) ||
486         ((Count < Limit) && RegNum == PRegNum+1))) {
487      Offset += Size;
488      PRegNum = RegNum;
489      ++Count;
490    } else {
491      // Can't merge this in. Try merge the earlier ones first.
492      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
493                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
494      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
495                   MemOps, Merges);
496      return;
497    }
498
499    if (MemOps[i].Position > MemOps[insertAfter].Position)
500      insertAfter = i;
501  }
502
503  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
504  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
505                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
506  return;
507}
508
509static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
510                                       unsigned Bytes, unsigned Limit,
511                                       ARMCC::CondCodes Pred, unsigned PredReg){
512  unsigned MyPredReg = 0;
513  if (!MI)
514    return false;
515  if (MI->getOpcode() != ARM::t2SUBri &&
516      MI->getOpcode() != ARM::tSUBspi &&
517      MI->getOpcode() != ARM::SUBri)
518    return false;
519
520  // Make sure the offset fits in 8 bits.
521  if (Bytes == 0 || (Limit && Bytes >= Limit))
522    return false;
523
524  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
525  return (MI->getOperand(0).getReg() == Base &&
526          MI->getOperand(1).getReg() == Base &&
527          (MI->getOperand(2).getImm()*Scale) == Bytes &&
528          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
529          MyPredReg == PredReg);
530}
531
532static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
533                                       unsigned Bytes, unsigned Limit,
534                                       ARMCC::CondCodes Pred, unsigned PredReg){
535  unsigned MyPredReg = 0;
536  if (!MI)
537    return false;
538  if (MI->getOpcode() != ARM::t2ADDri &&
539      MI->getOpcode() != ARM::tADDspi &&
540      MI->getOpcode() != ARM::ADDri)
541    return false;
542
543  if (Bytes == 0 || (Limit && Bytes >= Limit))
544    // Make sure the offset fits in 8 bits.
545    return false;
546
547  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
548  return (MI->getOperand(0).getReg() == Base &&
549          MI->getOperand(1).getReg() == Base &&
550          (MI->getOperand(2).getImm()*Scale) == Bytes &&
551          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
552          MyPredReg == PredReg);
553}
554
555static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
556  switch (MI->getOpcode()) {
557  default: return 0;
558  case ARM::LDRi12:
559  case ARM::STRi12:
560  case ARM::t2LDRi8:
561  case ARM::t2LDRi12:
562  case ARM::t2STRi8:
563  case ARM::t2STRi12:
564  case ARM::VLDRS:
565  case ARM::VSTRS:
566    return 4;
567  case ARM::VLDRD:
568  case ARM::VSTRD:
569    return 8;
570  case ARM::LDMIA:
571  case ARM::LDMDA:
572  case ARM::LDMDB:
573  case ARM::LDMIB:
574  case ARM::STMIA:
575  case ARM::STMDA:
576  case ARM::STMDB:
577  case ARM::STMIB:
578  case ARM::t2LDMIA:
579  case ARM::t2LDMDB:
580  case ARM::t2STMIA:
581  case ARM::t2STMDB:
582  case ARM::VLDMSIA:
583  case ARM::VSTMSIA:
584    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
585  case ARM::VLDMDIA:
586  case ARM::VSTMDIA:
587    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
588  }
589}
590
591static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
592                                            ARM_AM::AMSubMode Mode) {
593  switch (Opc) {
594  default: llvm_unreachable("Unhandled opcode!");
595  case ARM::LDMIA:
596  case ARM::LDMDA:
597  case ARM::LDMDB:
598  case ARM::LDMIB:
599    switch (Mode) {
600    default: llvm_unreachable("Unhandled submode!");
601    case ARM_AM::ia: return ARM::LDMIA_UPD;
602    case ARM_AM::ib: return ARM::LDMIB_UPD;
603    case ARM_AM::da: return ARM::LDMDA_UPD;
604    case ARM_AM::db: return ARM::LDMDB_UPD;
605    }
606    break;
607  case ARM::STMIA:
608  case ARM::STMDA:
609  case ARM::STMDB:
610  case ARM::STMIB:
611    switch (Mode) {
612    default: llvm_unreachable("Unhandled submode!");
613    case ARM_AM::ia: return ARM::STMIA_UPD;
614    case ARM_AM::ib: return ARM::STMIB_UPD;
615    case ARM_AM::da: return ARM::STMDA_UPD;
616    case ARM_AM::db: return ARM::STMDB_UPD;
617    }
618    break;
619  case ARM::t2LDMIA:
620  case ARM::t2LDMDB:
621    switch (Mode) {
622    default: llvm_unreachable("Unhandled submode!");
623    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
624    case ARM_AM::db: return ARM::t2LDMDB_UPD;
625    }
626    break;
627  case ARM::t2STMIA:
628  case ARM::t2STMDB:
629    switch (Mode) {
630    default: llvm_unreachable("Unhandled submode!");
631    case ARM_AM::ia: return ARM::t2STMIA_UPD;
632    case ARM_AM::db: return ARM::t2STMDB_UPD;
633    }
634    break;
635  case ARM::VLDMSIA:
636    switch (Mode) {
637    default: llvm_unreachable("Unhandled submode!");
638    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
639    case ARM_AM::db: return ARM::VLDMSDB_UPD;
640    }
641    break;
642  case ARM::VLDMDIA:
643    switch (Mode) {
644    default: llvm_unreachable("Unhandled submode!");
645    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
646    case ARM_AM::db: return ARM::VLDMDDB_UPD;
647    }
648    break;
649  case ARM::VSTMSIA:
650    switch (Mode) {
651    default: llvm_unreachable("Unhandled submode!");
652    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
653    case ARM_AM::db: return ARM::VSTMSDB_UPD;
654    }
655    break;
656  case ARM::VSTMDIA:
657    switch (Mode) {
658    default: llvm_unreachable("Unhandled submode!");
659    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
660    case ARM_AM::db: return ARM::VSTMDDB_UPD;
661    }
662    break;
663  }
664
665  return 0;
666}
667
668/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
669/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
670///
671/// stmia rn, <ra, rb, rc>
672/// rn := rn + 4 * 3;
673/// =>
674/// stmia rn!, <ra, rb, rc>
675///
676/// rn := rn - 4 * 3;
677/// ldmia rn, <ra, rb, rc>
678/// =>
679/// ldmdb rn!, <ra, rb, rc>
680bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
681                                               MachineBasicBlock::iterator MBBI,
682                                               bool &Advance,
683                                               MachineBasicBlock::iterator &I) {
684  MachineInstr *MI = MBBI;
685  unsigned Base = MI->getOperand(0).getReg();
686  bool BaseKill = MI->getOperand(0).isKill();
687  unsigned Bytes = getLSMultipleTransferSize(MI);
688  unsigned PredReg = 0;
689  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
690  int Opcode = MI->getOpcode();
691  DebugLoc dl = MI->getDebugLoc();
692
693  // Can't use an updating ld/st if the base register is also a dest
694  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
695  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
696    if (MI->getOperand(i).getReg() == Base)
697      return false;
698
699  bool DoMerge = false;
700  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
701
702  // Try merging with the previous instruction.
703  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
704  if (MBBI != BeginMBBI) {
705    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
706    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
707      --PrevMBBI;
708    if (Mode == ARM_AM::ia &&
709        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
710      Mode = ARM_AM::db;
711      DoMerge = true;
712    } else if (Mode == ARM_AM::ib &&
713               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
714      Mode = ARM_AM::da;
715      DoMerge = true;
716    }
717    if (DoMerge)
718      MBB.erase(PrevMBBI);
719  }
720
721  // Try merging with the next instruction.
722  MachineBasicBlock::iterator EndMBBI = MBB.end();
723  if (!DoMerge && MBBI != EndMBBI) {
724    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
725    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
726      ++NextMBBI;
727    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
728        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
729      DoMerge = true;
730    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
731               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
732      DoMerge = true;
733    }
734    if (DoMerge) {
735      if (NextMBBI == I) {
736        Advance = true;
737        ++I;
738      }
739      MBB.erase(NextMBBI);
740    }
741  }
742
743  if (!DoMerge)
744    return false;
745
746  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
747  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
748    .addReg(Base, getDefRegState(true)) // WB base register
749    .addReg(Base, getKillRegState(BaseKill))
750    .addImm(Pred).addReg(PredReg);
751
752  // Transfer the rest of operands.
753  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
754    MIB.addOperand(MI->getOperand(OpNum));
755
756  // Transfer memoperands.
757  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
758
759  MBB.erase(MBBI);
760  return true;
761}
762
763static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
764                                             ARM_AM::AddrOpc Mode) {
765  switch (Opc) {
766  case ARM::LDRi12:
767    return ARM::LDR_PRE_IMM;
768  case ARM::STRi12:
769    return ARM::STR_PRE_IMM;
770  case ARM::VLDRS:
771    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
772  case ARM::VLDRD:
773    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
774  case ARM::VSTRS:
775    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
776  case ARM::VSTRD:
777    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
778  case ARM::t2LDRi8:
779  case ARM::t2LDRi12:
780    return ARM::t2LDR_PRE;
781  case ARM::t2STRi8:
782  case ARM::t2STRi12:
783    return ARM::t2STR_PRE;
784  default: llvm_unreachable("Unhandled opcode!");
785  }
786  return 0;
787}
788
789static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
790                                              ARM_AM::AddrOpc Mode) {
791  switch (Opc) {
792  case ARM::LDRi12:
793    return ARM::LDR_POST_IMM;
794  case ARM::STRi12:
795    return ARM::STR_POST_IMM;
796  case ARM::VLDRS:
797    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
798  case ARM::VLDRD:
799    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
800  case ARM::VSTRS:
801    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
802  case ARM::VSTRD:
803    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
804  case ARM::t2LDRi8:
805  case ARM::t2LDRi12:
806    return ARM::t2LDR_POST;
807  case ARM::t2STRi8:
808  case ARM::t2STRi12:
809    return ARM::t2STR_POST;
810  default: llvm_unreachable("Unhandled opcode!");
811  }
812  return 0;
813}
814
815/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
816/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
817bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
818                                               MachineBasicBlock::iterator MBBI,
819                                               const TargetInstrInfo *TII,
820                                               bool &Advance,
821                                               MachineBasicBlock::iterator &I) {
822  MachineInstr *MI = MBBI;
823  unsigned Base = MI->getOperand(1).getReg();
824  bool BaseKill = MI->getOperand(1).isKill();
825  unsigned Bytes = getLSMultipleTransferSize(MI);
826  int Opcode = MI->getOpcode();
827  DebugLoc dl = MI->getDebugLoc();
828  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
829                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
830  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
831  if (isi32Load(Opcode) || isi32Store(Opcode))
832    if (MI->getOperand(2).getImm() != 0)
833      return false;
834  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
835    return false;
836
837  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
838  // Can't do the merge if the destination register is the same as the would-be
839  // writeback register.
840  if (isLd && MI->getOperand(0).getReg() == Base)
841    return false;
842
843  unsigned PredReg = 0;
844  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
845  bool DoMerge = false;
846  ARM_AM::AddrOpc AddSub = ARM_AM::add;
847  unsigned NewOpc = 0;
848  // AM2 - 12 bits, thumb2 - 8 bits.
849  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
850
851  // Try merging with the previous instruction.
852  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
853  if (MBBI != BeginMBBI) {
854    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
855    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
856      --PrevMBBI;
857    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
858      DoMerge = true;
859      AddSub = ARM_AM::sub;
860    } else if (!isAM5 &&
861               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
862      DoMerge = true;
863    }
864    if (DoMerge) {
865      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
866      MBB.erase(PrevMBBI);
867    }
868  }
869
870  // Try merging with the next instruction.
871  MachineBasicBlock::iterator EndMBBI = MBB.end();
872  if (!DoMerge && MBBI != EndMBBI) {
873    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
874    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
875      ++NextMBBI;
876    if (!isAM5 &&
877        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
878      DoMerge = true;
879      AddSub = ARM_AM::sub;
880    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
881      DoMerge = true;
882    }
883    if (DoMerge) {
884      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
885      if (NextMBBI == I) {
886        Advance = true;
887        ++I;
888      }
889      MBB.erase(NextMBBI);
890    }
891  }
892
893  if (!DoMerge)
894    return false;
895
896  if (isAM5) {
897    // VLDM[SD}_UPD, VSTM[SD]_UPD
898    // (There are no base-updating versions of VLDR/VSTR instructions, but the
899    // updating load/store-multiple instructions can be used with only one
900    // register.)
901    MachineOperand &MO = MI->getOperand(0);
902    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
903      .addReg(Base, getDefRegState(true)) // WB base register
904      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
905      .addImm(Pred).addReg(PredReg)
906      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
907                            getKillRegState(MO.isKill())));
908  } else if (isLd) {
909    if (isAM2) {
910      // LDR_PRE, LDR_POST
911      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
912        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
913        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
914          .addReg(Base, RegState::Define)
915          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
916      } else {
917        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
918        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
919          .addReg(Base, RegState::Define)
920          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
921      }
922    } else {
923      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
924      // t2LDR_PRE, t2LDR_POST
925      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
926        .addReg(Base, RegState::Define)
927        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
928    }
929  } else {
930    MachineOperand &MO = MI->getOperand(0);
931    // FIXME: post-indexed stores use am2offset_imm, which still encodes
932    // the vestigal zero-reg offset register. When that's fixed, this clause
933    // can be removed entirely.
934    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
935      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
936      // STR_PRE, STR_POST
937      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
938        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
939        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
940    } else {
941      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
942      // t2STR_PRE, t2STR_POST
943      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
944        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
945        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
946    }
947  }
948  MBB.erase(MBBI);
949
950  return true;
951}
952
953/// isMemoryOp - Returns true if instruction is a memory operation that this
954/// pass is capable of operating on.
955static bool isMemoryOp(const MachineInstr *MI) {
956  // When no memory operands are present, conservatively assume unaligned,
957  // volatile, unfoldable.
958  if (!MI->hasOneMemOperand())
959    return false;
960
961  const MachineMemOperand *MMO = *MI->memoperands_begin();
962
963  // Don't touch volatile memory accesses - we may be changing their order.
964  if (MMO->isVolatile())
965    return false;
966
967  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
968  // not.
969  if (MMO->getAlignment() < 4)
970    return false;
971
972  // str <undef> could probably be eliminated entirely, but for now we just want
973  // to avoid making a mess of it.
974  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
975  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
976      MI->getOperand(0).isUndef())
977    return false;
978
979  // Likewise don't mess with references to undefined addresses.
980  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
981      MI->getOperand(1).isUndef())
982    return false;
983
984  int Opcode = MI->getOpcode();
985  switch (Opcode) {
986  default: break;
987  case ARM::VLDRS:
988  case ARM::VSTRS:
989    return MI->getOperand(1).isReg();
990  case ARM::VLDRD:
991  case ARM::VSTRD:
992    return MI->getOperand(1).isReg();
993  case ARM::LDRi12:
994  case ARM::STRi12:
995  case ARM::t2LDRi8:
996  case ARM::t2LDRi12:
997  case ARM::t2STRi8:
998  case ARM::t2STRi12:
999    return MI->getOperand(1).isReg();
1000  }
1001  return false;
1002}
1003
1004/// AdvanceRS - Advance register scavenger to just before the earliest memory
1005/// op that is being merged.
1006void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1007  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1008  unsigned Position = MemOps[0].Position;
1009  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1010    if (MemOps[i].Position < Position) {
1011      Position = MemOps[i].Position;
1012      Loc = MemOps[i].MBBI;
1013    }
1014  }
1015
1016  if (Loc != MBB.begin())
1017    RS->forward(prior(Loc));
1018}
1019
1020static int getMemoryOpOffset(const MachineInstr *MI) {
1021  int Opcode = MI->getOpcode();
1022  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1023  unsigned NumOperands = MI->getDesc().getNumOperands();
1024  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1025
1026  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1027      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1028      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1029      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1030    return OffField;
1031
1032  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1033    : ARM_AM::getAM5Offset(OffField) * 4;
1034  if (isAM3) {
1035    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1036      Offset = -Offset;
1037  } else {
1038    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1039      Offset = -Offset;
1040  }
1041  return Offset;
1042}
1043
1044static void InsertLDR_STR(MachineBasicBlock &MBB,
1045                          MachineBasicBlock::iterator &MBBI,
1046                          int Offset, bool isDef,
1047                          DebugLoc dl, unsigned NewOpc,
1048                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1049                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1050                          bool OffKill, bool OffUndef,
1051                          ARMCC::CondCodes Pred, unsigned PredReg,
1052                          const TargetInstrInfo *TII, bool isT2) {
1053  if (isDef) {
1054    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1055                                      TII->get(NewOpc))
1056      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1057      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1058    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1059  } else {
1060    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1061                                      TII->get(NewOpc))
1062      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1063      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1064    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1065  }
1066}
1067
1068bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1069                                          MachineBasicBlock::iterator &MBBI) {
1070  MachineInstr *MI = &*MBBI;
1071  unsigned Opcode = MI->getOpcode();
1072  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1073      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1074    unsigned EvenReg = MI->getOperand(0).getReg();
1075    unsigned OddReg  = MI->getOperand(1).getReg();
1076    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1077    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1078    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
1079      return false;
1080
1081    MachineBasicBlock::iterator NewBBI = MBBI;
1082    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1083    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1084    bool EvenDeadKill = isLd ?
1085      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1086    bool EvenUndef = MI->getOperand(0).isUndef();
1087    bool OddDeadKill  = isLd ?
1088      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1089    bool OddUndef = MI->getOperand(1).isUndef();
1090    const MachineOperand &BaseOp = MI->getOperand(2);
1091    unsigned BaseReg = BaseOp.getReg();
1092    bool BaseKill = BaseOp.isKill();
1093    bool BaseUndef = BaseOp.isUndef();
1094    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1095    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1096    int OffImm = getMemoryOpOffset(MI);
1097    unsigned PredReg = 0;
1098    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
1099
1100    if (OddRegNum > EvenRegNum && OffImm == 0) {
1101      // Ascending register numbers and no offset. It's safe to change it to a
1102      // ldm or stm.
1103      unsigned NewOpc = (isLd)
1104        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1105        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1106      if (isLd) {
1107        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1108          .addReg(BaseReg, getKillRegState(BaseKill))
1109          .addImm(Pred).addReg(PredReg)
1110          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1111          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1112        ++NumLDRD2LDM;
1113      } else {
1114        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1115          .addReg(BaseReg, getKillRegState(BaseKill))
1116          .addImm(Pred).addReg(PredReg)
1117          .addReg(EvenReg,
1118                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1119          .addReg(OddReg,
1120                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1121        ++NumSTRD2STM;
1122      }
1123      NewBBI = llvm::prior(MBBI);
1124    } else {
1125      // Split into two instructions.
1126      unsigned NewOpc = (isLd)
1127        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1128        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1129      DebugLoc dl = MBBI->getDebugLoc();
1130      // If this is a load and base register is killed, it may have been
1131      // re-defed by the load, make sure the first load does not clobber it.
1132      if (isLd &&
1133          (BaseKill || OffKill) &&
1134          (TRI->regsOverlap(EvenReg, BaseReg))) {
1135        assert(!TRI->regsOverlap(OddReg, BaseReg));
1136        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1137                      OddReg, OddDeadKill, false,
1138                      BaseReg, false, BaseUndef, false, OffUndef,
1139                      Pred, PredReg, TII, isT2);
1140        NewBBI = llvm::prior(MBBI);
1141        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1142                      EvenReg, EvenDeadKill, false,
1143                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1144                      Pred, PredReg, TII, isT2);
1145      } else {
1146        if (OddReg == EvenReg && EvenDeadKill) {
1147          // If the two source operands are the same, the kill marker is
1148          // probably on the first one. e.g.
1149          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1150          EvenDeadKill = false;
1151          OddDeadKill = true;
1152        }
1153        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1154                      EvenReg, EvenDeadKill, EvenUndef,
1155                      BaseReg, false, BaseUndef, false, OffUndef,
1156                      Pred, PredReg, TII, isT2);
1157        NewBBI = llvm::prior(MBBI);
1158        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
1159                      OddReg, OddDeadKill, OddUndef,
1160                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1161                      Pred, PredReg, TII, isT2);
1162      }
1163      if (isLd)
1164        ++NumLDRD2LDR;
1165      else
1166        ++NumSTRD2STR;
1167    }
1168
1169    MBB.erase(MI);
1170    MBBI = NewBBI;
1171    return true;
1172  }
1173  return false;
1174}
1175
1176/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1177/// ops of the same base and incrementing offset into LDM / STM ops.
1178bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1179  unsigned NumMerges = 0;
1180  unsigned NumMemOps = 0;
1181  MemOpQueue MemOps;
1182  unsigned CurrBase = 0;
1183  int CurrOpc = -1;
1184  unsigned CurrSize = 0;
1185  ARMCC::CondCodes CurrPred = ARMCC::AL;
1186  unsigned CurrPredReg = 0;
1187  unsigned Position = 0;
1188  SmallVector<MachineBasicBlock::iterator,4> Merges;
1189
1190  RS->enterBasicBlock(&MBB);
1191  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1192  while (MBBI != E) {
1193    if (FixInvalidRegPairOp(MBB, MBBI))
1194      continue;
1195
1196    bool Advance  = false;
1197    bool TryMerge = false;
1198    bool Clobber  = false;
1199
1200    bool isMemOp = isMemoryOp(MBBI);
1201    if (isMemOp) {
1202      int Opcode = MBBI->getOpcode();
1203      unsigned Size = getLSMultipleTransferSize(MBBI);
1204      const MachineOperand &MO = MBBI->getOperand(0);
1205      unsigned Reg = MO.getReg();
1206      bool isKill = MO.isDef() ? false : MO.isKill();
1207      unsigned Base = MBBI->getOperand(1).getReg();
1208      unsigned PredReg = 0;
1209      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
1210      int Offset = getMemoryOpOffset(MBBI);
1211      // Watch out for:
1212      // r4 := ldr [r5]
1213      // r5 := ldr [r5, #4]
1214      // r6 := ldr [r5, #8]
1215      //
1216      // The second ldr has effectively broken the chain even though it
1217      // looks like the later ldr(s) use the same base register. Try to
1218      // merge the ldr's so far, including this one. But don't try to
1219      // combine the following ldr(s).
1220      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1221      if (CurrBase == 0 && !Clobber) {
1222        // Start of a new chain.
1223        CurrBase = Base;
1224        CurrOpc  = Opcode;
1225        CurrSize = Size;
1226        CurrPred = Pred;
1227        CurrPredReg = PredReg;
1228        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1229        ++NumMemOps;
1230        Advance = true;
1231      } else {
1232        if (Clobber) {
1233          TryMerge = true;
1234          Advance = true;
1235        }
1236
1237        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1238          // No need to match PredReg.
1239          // Continue adding to the queue.
1240          if (Offset > MemOps.back().Offset) {
1241            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1242                                             Position, MBBI));
1243            ++NumMemOps;
1244            Advance = true;
1245          } else {
1246            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1247                 I != E; ++I) {
1248              if (Offset < I->Offset) {
1249                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1250                                                 Position, MBBI));
1251                ++NumMemOps;
1252                Advance = true;
1253                break;
1254              } else if (Offset == I->Offset) {
1255                // Collision! This can't be merged!
1256                break;
1257              }
1258            }
1259          }
1260        }
1261      }
1262    }
1263
1264    if (MBBI->isDebugValue()) {
1265      ++MBBI;
1266      if (MBBI == E)
1267        // Reach the end of the block, try merging the memory instructions.
1268        TryMerge = true;
1269    } else if (Advance) {
1270      ++Position;
1271      ++MBBI;
1272      if (MBBI == E)
1273        // Reach the end of the block, try merging the memory instructions.
1274        TryMerge = true;
1275    } else
1276      TryMerge = true;
1277
1278    if (TryMerge) {
1279      if (NumMemOps > 1) {
1280        // Try to find a free register to use as a new base in case it's needed.
1281        // First advance to the instruction just before the start of the chain.
1282        AdvanceRS(MBB, MemOps);
1283        // Find a scratch register.
1284        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
1285        // Process the load / store instructions.
1286        RS->forward(prior(MBBI));
1287
1288        // Merge ops.
1289        Merges.clear();
1290        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1291                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1292
1293        // Try folding preceding/trailing base inc/dec into the generated
1294        // LDM/STM ops.
1295        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1296          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1297            ++NumMerges;
1298        NumMerges += Merges.size();
1299
1300        // Try folding preceding/trailing base inc/dec into those load/store
1301        // that were not merged to form LDM/STM ops.
1302        for (unsigned i = 0; i != NumMemOps; ++i)
1303          if (!MemOps[i].Merged)
1304            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1305              ++NumMerges;
1306
1307        // RS may be pointing to an instruction that's deleted.
1308        RS->skipTo(prior(MBBI));
1309      } else if (NumMemOps == 1) {
1310        // Try folding preceding/trailing base inc/dec into the single
1311        // load/store.
1312        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1313          ++NumMerges;
1314          RS->forward(prior(MBBI));
1315        }
1316      }
1317
1318      CurrBase = 0;
1319      CurrOpc = -1;
1320      CurrSize = 0;
1321      CurrPred = ARMCC::AL;
1322      CurrPredReg = 0;
1323      if (NumMemOps) {
1324        MemOps.clear();
1325        NumMemOps = 0;
1326      }
1327
1328      // If iterator hasn't been advanced and this is not a memory op, skip it.
1329      // It can't start a new chain anyway.
1330      if (!Advance && !isMemOp && MBBI != E) {
1331        ++Position;
1332        ++MBBI;
1333      }
1334    }
1335  }
1336  return NumMerges > 0;
1337}
1338
1339/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1340/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1341/// directly restore the value of LR into pc.
1342///   ldmfd sp!, {..., lr}
1343///   bx lr
1344/// or
1345///   ldmfd sp!, {..., lr}
1346///   mov pc, lr
1347/// =>
1348///   ldmfd sp!, {..., pc}
1349bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1350  if (MBB.empty()) return false;
1351
1352  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1353  if (MBBI != MBB.begin() &&
1354      (MBBI->getOpcode() == ARM::BX_RET ||
1355       MBBI->getOpcode() == ARM::tBX_RET ||
1356       MBBI->getOpcode() == ARM::MOVPCLR)) {
1357    MachineInstr *PrevMI = prior(MBBI);
1358    unsigned Opcode = PrevMI->getOpcode();
1359    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1360        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1361        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1362      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1363      if (MO.getReg() != ARM::LR)
1364        return false;
1365      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1366      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1367              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1368      PrevMI->setDesc(TII->get(NewOpc));
1369      MO.setReg(ARM::PC);
1370      PrevMI->copyImplicitOps(&*MBBI);
1371      MBB.erase(MBBI);
1372      return true;
1373    }
1374  }
1375  return false;
1376}
1377
1378bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1379  const TargetMachine &TM = Fn.getTarget();
1380  AFI = Fn.getInfo<ARMFunctionInfo>();
1381  TII = TM.getInstrInfo();
1382  TRI = TM.getRegisterInfo();
1383  RS = new RegScavenger();
1384  isThumb2 = AFI->isThumb2Function();
1385
1386  bool Modified = false;
1387  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1388       ++MFI) {
1389    MachineBasicBlock &MBB = *MFI;
1390    Modified |= LoadStoreMultipleOpti(MBB);
1391    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1392      Modified |= MergeReturnIntoLDM(MBB);
1393  }
1394
1395  delete RS;
1396  return Modified;
1397}
1398
1399
1400/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1401/// load / stores from consecutive locations close to make it more
1402/// likely they will be combined later.
1403
1404namespace {
1405  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1406    static char ID;
1407    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1408
1409    const TargetData *TD;
1410    const TargetInstrInfo *TII;
1411    const TargetRegisterInfo *TRI;
1412    const ARMSubtarget *STI;
1413    MachineRegisterInfo *MRI;
1414    MachineFunction *MF;
1415
1416    virtual bool runOnMachineFunction(MachineFunction &Fn);
1417
1418    virtual const char *getPassName() const {
1419      return "ARM pre- register allocation load / store optimization pass";
1420    }
1421
1422  private:
1423    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1424                          unsigned &NewOpc, unsigned &EvenReg,
1425                          unsigned &OddReg, unsigned &BaseReg,
1426                          int &Offset,
1427                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1428                          bool &isT2);
1429    bool RescheduleOps(MachineBasicBlock *MBB,
1430                       SmallVector<MachineInstr*, 4> &Ops,
1431                       unsigned Base, bool isLd,
1432                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1433    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1434  };
1435  char ARMPreAllocLoadStoreOpt::ID = 0;
1436}
1437
1438bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1439  TD  = Fn.getTarget().getTargetData();
1440  TII = Fn.getTarget().getInstrInfo();
1441  TRI = Fn.getTarget().getRegisterInfo();
1442  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1443  MRI = &Fn.getRegInfo();
1444  MF  = &Fn;
1445
1446  bool Modified = false;
1447  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1448       ++MFI)
1449    Modified |= RescheduleLoadStoreInstrs(MFI);
1450
1451  return Modified;
1452}
1453
1454static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1455                                      MachineBasicBlock::iterator I,
1456                                      MachineBasicBlock::iterator E,
1457                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1458                                      SmallSet<unsigned, 4> &MemRegs,
1459                                      const TargetRegisterInfo *TRI) {
1460  // Are there stores / loads / calls between them?
1461  // FIXME: This is overly conservative. We should make use of alias information
1462  // some day.
1463  SmallSet<unsigned, 4> AddedRegPressure;
1464  while (++I != E) {
1465    if (I->isDebugValue() || MemOps.count(&*I))
1466      continue;
1467    const MCInstrDesc &MCID = I->getDesc();
1468    if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
1469      return false;
1470    if (isLd && MCID.mayStore())
1471      return false;
1472    if (!isLd) {
1473      if (MCID.mayLoad())
1474        return false;
1475      // It's not safe to move the first 'str' down.
1476      // str r1, [r0]
1477      // strh r5, [r0]
1478      // str r4, [r0, #+4]
1479      if (MCID.mayStore())
1480        return false;
1481    }
1482    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1483      MachineOperand &MO = I->getOperand(j);
1484      if (!MO.isReg())
1485        continue;
1486      unsigned Reg = MO.getReg();
1487      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1488        return false;
1489      if (Reg != Base && !MemRegs.count(Reg))
1490        AddedRegPressure.insert(Reg);
1491    }
1492  }
1493
1494  // Estimate register pressure increase due to the transformation.
1495  if (MemRegs.size() <= 4)
1496    // Ok if we are moving small number of instructions.
1497    return true;
1498  return AddedRegPressure.size() <= MemRegs.size() * 2;
1499}
1500
1501bool
1502ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1503                                          DebugLoc &dl,
1504                                          unsigned &NewOpc, unsigned &EvenReg,
1505                                          unsigned &OddReg, unsigned &BaseReg,
1506                                          int &Offset, unsigned &PredReg,
1507                                          ARMCC::CondCodes &Pred,
1508                                          bool &isT2) {
1509  // Make sure we're allowed to generate LDRD/STRD.
1510  if (!STI->hasV5TEOps())
1511    return false;
1512
1513  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1514  unsigned Scale = 1;
1515  unsigned Opcode = Op0->getOpcode();
1516  if (Opcode == ARM::LDRi12)
1517    NewOpc = ARM::LDRD;
1518  else if (Opcode == ARM::STRi12)
1519    NewOpc = ARM::STRD;
1520  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1521    NewOpc = ARM::t2LDRDi8;
1522    Scale = 4;
1523    isT2 = true;
1524  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1525    NewOpc = ARM::t2STRDi8;
1526    Scale = 4;
1527    isT2 = true;
1528  } else
1529    return false;
1530
1531  // Make sure the base address satisfies i64 ld / st alignment requirement.
1532  if (!Op0->hasOneMemOperand() ||
1533      !(*Op0->memoperands_begin())->getValue() ||
1534      (*Op0->memoperands_begin())->isVolatile())
1535    return false;
1536
1537  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1538  const Function *Func = MF->getFunction();
1539  unsigned ReqAlign = STI->hasV6Ops()
1540    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1541    : 8;  // Pre-v6 need 8-byte align
1542  if (Align < ReqAlign)
1543    return false;
1544
1545  // Then make sure the immediate offset fits.
1546  int OffImm = getMemoryOpOffset(Op0);
1547  if (isT2) {
1548    int Limit = (1 << 8) * Scale;
1549    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1550      return false;
1551    Offset = OffImm;
1552  } else {
1553    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1554    if (OffImm < 0) {
1555      AddSub = ARM_AM::sub;
1556      OffImm = - OffImm;
1557    }
1558    int Limit = (1 << 8) * Scale;
1559    if (OffImm >= Limit || (OffImm & (Scale-1)))
1560      return false;
1561    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1562  }
1563  EvenReg = Op0->getOperand(0).getReg();
1564  OddReg  = Op1->getOperand(0).getReg();
1565  if (EvenReg == OddReg)
1566    return false;
1567  BaseReg = Op0->getOperand(1).getReg();
1568  Pred = llvm::getInstrPredicate(Op0, PredReg);
1569  dl = Op0->getDebugLoc();
1570  return true;
1571}
1572
1573namespace {
1574  struct OffsetCompare {
1575    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1576      int LOffset = getMemoryOpOffset(LHS);
1577      int ROffset = getMemoryOpOffset(RHS);
1578      assert(LHS == RHS || LOffset != ROffset);
1579      return LOffset > ROffset;
1580    }
1581  };
1582}
1583
1584bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1585                                 SmallVector<MachineInstr*, 4> &Ops,
1586                                 unsigned Base, bool isLd,
1587                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1588  bool RetVal = false;
1589
1590  // Sort by offset (in reverse order).
1591  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1592
1593  // The loads / stores of the same base are in order. Scan them from first to
1594  // last and check for the following:
1595  // 1. Any def of base.
1596  // 2. Any gaps.
1597  while (Ops.size() > 1) {
1598    unsigned FirstLoc = ~0U;
1599    unsigned LastLoc = 0;
1600    MachineInstr *FirstOp = 0;
1601    MachineInstr *LastOp = 0;
1602    int LastOffset = 0;
1603    unsigned LastOpcode = 0;
1604    unsigned LastBytes = 0;
1605    unsigned NumMove = 0;
1606    for (int i = Ops.size() - 1; i >= 0; --i) {
1607      MachineInstr *Op = Ops[i];
1608      unsigned Loc = MI2LocMap[Op];
1609      if (Loc <= FirstLoc) {
1610        FirstLoc = Loc;
1611        FirstOp = Op;
1612      }
1613      if (Loc >= LastLoc) {
1614        LastLoc = Loc;
1615        LastOp = Op;
1616      }
1617
1618      unsigned Opcode = Op->getOpcode();
1619      if (LastOpcode && Opcode != LastOpcode)
1620        break;
1621
1622      int Offset = getMemoryOpOffset(Op);
1623      unsigned Bytes = getLSMultipleTransferSize(Op);
1624      if (LastBytes) {
1625        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1626          break;
1627      }
1628      LastOffset = Offset;
1629      LastBytes = Bytes;
1630      LastOpcode = Opcode;
1631      if (++NumMove == 8) // FIXME: Tune this limit.
1632        break;
1633    }
1634
1635    if (NumMove <= 1)
1636      Ops.pop_back();
1637    else {
1638      SmallPtrSet<MachineInstr*, 4> MemOps;
1639      SmallSet<unsigned, 4> MemRegs;
1640      for (int i = NumMove-1; i >= 0; --i) {
1641        MemOps.insert(Ops[i]);
1642        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1643      }
1644
1645      // Be conservative, if the instructions are too far apart, don't
1646      // move them. We want to limit the increase of register pressure.
1647      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1648      if (DoMove)
1649        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1650                                           MemOps, MemRegs, TRI);
1651      if (!DoMove) {
1652        for (unsigned i = 0; i != NumMove; ++i)
1653          Ops.pop_back();
1654      } else {
1655        // This is the new location for the loads / stores.
1656        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1657        while (InsertPos != MBB->end()
1658               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1659          ++InsertPos;
1660
1661        // If we are moving a pair of loads / stores, see if it makes sense
1662        // to try to allocate a pair of registers that can form register pairs.
1663        MachineInstr *Op0 = Ops.back();
1664        MachineInstr *Op1 = Ops[Ops.size()-2];
1665        unsigned EvenReg = 0, OddReg = 0;
1666        unsigned BaseReg = 0, PredReg = 0;
1667        ARMCC::CondCodes Pred = ARMCC::AL;
1668        bool isT2 = false;
1669        unsigned NewOpc = 0;
1670        int Offset = 0;
1671        DebugLoc dl;
1672        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1673                                             EvenReg, OddReg, BaseReg,
1674                                             Offset, PredReg, Pred, isT2)) {
1675          Ops.pop_back();
1676          Ops.pop_back();
1677
1678          const MCInstrDesc &MCID = TII->get(NewOpc);
1679          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
1680          MRI->constrainRegClass(EvenReg, TRC);
1681          MRI->constrainRegClass(OddReg, TRC);
1682
1683          // Form the pair instruction.
1684          if (isLd) {
1685            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1686              .addReg(EvenReg, RegState::Define)
1687              .addReg(OddReg, RegState::Define)
1688              .addReg(BaseReg);
1689            // FIXME: We're converting from LDRi12 to an insn that still
1690            // uses addrmode2, so we need an explicit offset reg. It should
1691            // always by reg0 since we're transforming LDRi12s.
1692            if (!isT2)
1693              MIB.addReg(0);
1694            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1695            ++NumLDRDFormed;
1696          } else {
1697            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1698              .addReg(EvenReg)
1699              .addReg(OddReg)
1700              .addReg(BaseReg);
1701            // FIXME: We're converting from LDRi12 to an insn that still
1702            // uses addrmode2, so we need an explicit offset reg. It should
1703            // always by reg0 since we're transforming STRi12s.
1704            if (!isT2)
1705              MIB.addReg(0);
1706            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1707            ++NumSTRDFormed;
1708          }
1709          MBB->erase(Op0);
1710          MBB->erase(Op1);
1711
1712          // Add register allocation hints to form register pairs.
1713          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1714          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1715        } else {
1716          for (unsigned i = 0; i != NumMove; ++i) {
1717            MachineInstr *Op = Ops.back();
1718            Ops.pop_back();
1719            MBB->splice(InsertPos, MBB, Op);
1720          }
1721        }
1722
1723        NumLdStMoved += NumMove;
1724        RetVal = true;
1725      }
1726    }
1727  }
1728
1729  return RetVal;
1730}
1731
1732bool
1733ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1734  bool RetVal = false;
1735
1736  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1737  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1738  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1739  SmallVector<unsigned, 4> LdBases;
1740  SmallVector<unsigned, 4> StBases;
1741
1742  unsigned Loc = 0;
1743  MachineBasicBlock::iterator MBBI = MBB->begin();
1744  MachineBasicBlock::iterator E = MBB->end();
1745  while (MBBI != E) {
1746    for (; MBBI != E; ++MBBI) {
1747      MachineInstr *MI = MBBI;
1748      const MCInstrDesc &MCID = MI->getDesc();
1749      if (MCID.isCall() || MCID.isTerminator()) {
1750        // Stop at barriers.
1751        ++MBBI;
1752        break;
1753      }
1754
1755      if (!MI->isDebugValue())
1756        MI2LocMap[MI] = ++Loc;
1757
1758      if (!isMemoryOp(MI))
1759        continue;
1760      unsigned PredReg = 0;
1761      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
1762        continue;
1763
1764      int Opc = MI->getOpcode();
1765      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1766      unsigned Base = MI->getOperand(1).getReg();
1767      int Offset = getMemoryOpOffset(MI);
1768
1769      bool StopHere = false;
1770      if (isLd) {
1771        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1772          Base2LdsMap.find(Base);
1773        if (BI != Base2LdsMap.end()) {
1774          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1775            if (Offset == getMemoryOpOffset(BI->second[i])) {
1776              StopHere = true;
1777              break;
1778            }
1779          }
1780          if (!StopHere)
1781            BI->second.push_back(MI);
1782        } else {
1783          SmallVector<MachineInstr*, 4> MIs;
1784          MIs.push_back(MI);
1785          Base2LdsMap[Base] = MIs;
1786          LdBases.push_back(Base);
1787        }
1788      } else {
1789        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1790          Base2StsMap.find(Base);
1791        if (BI != Base2StsMap.end()) {
1792          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1793            if (Offset == getMemoryOpOffset(BI->second[i])) {
1794              StopHere = true;
1795              break;
1796            }
1797          }
1798          if (!StopHere)
1799            BI->second.push_back(MI);
1800        } else {
1801          SmallVector<MachineInstr*, 4> MIs;
1802          MIs.push_back(MI);
1803          Base2StsMap[Base] = MIs;
1804          StBases.push_back(Base);
1805        }
1806      }
1807
1808      if (StopHere) {
1809        // Found a duplicate (a base+offset combination that's seen earlier).
1810        // Backtrack.
1811        --Loc;
1812        break;
1813      }
1814    }
1815
1816    // Re-schedule loads.
1817    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1818      unsigned Base = LdBases[i];
1819      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1820      if (Lds.size() > 1)
1821        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1822    }
1823
1824    // Re-schedule stores.
1825    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1826      unsigned Base = StBases[i];
1827      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1828      if (Sts.size() > 1)
1829        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1830    }
1831
1832    if (MBBI != E) {
1833      Base2LdsMap.clear();
1834      Base2StsMap.clear();
1835      LdBases.clear();
1836      StBases.clear();
1837    }
1838  }
1839
1840  return RetVal;
1841}
1842
1843
1844/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1845/// optimization pass.
1846FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1847  if (PreAlloc)
1848    return new ARMPreAllocLoadStoreOpt();
1849  return new ARMLoadStoreOpt();
1850}
1851