1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMBaseRegisterInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallPtrSet.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineBasicBlock.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/CodeGen/RegisterScavenging.h"
33#include "llvm/CodeGen/SelectionDAGNodes.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/Function.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/ErrorHandling.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Target/TargetInstrInfo.h"
41#include "llvm/Target/TargetMachine.h"
42#include "llvm/Target/TargetRegisterInfo.h"
43using namespace llvm;
44
45STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46STATISTIC(NumSTMGened , "Number of stm instructions generated");
47STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56
57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58/// load / store instructions to form ldm / stm instructions.
59
60namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62    static char ID;
63    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65    const TargetInstrInfo *TII;
66    const TargetRegisterInfo *TRI;
67    const ARMSubtarget *STI;
68    ARMFunctionInfo *AFI;
69    RegScavenger *RS;
70    bool isThumb2;
71
72    virtual bool runOnMachineFunction(MachineFunction &Fn);
73
74    virtual const char *getPassName() const {
75      return "ARM load / store optimization pass";
76    }
77
78  private:
79    struct MemOpQueueEntry {
80      int Offset;
81      unsigned Reg;
82      bool isKill;
83      unsigned Position;
84      MachineBasicBlock::iterator MBBI;
85      bool Merged;
86      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                      MachineBasicBlock::iterator i)
88        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89    };
90    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91    typedef MemOpQueue::iterator MemOpQueueIter;
92
93    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                  int Offset, unsigned Base, bool BaseKill, int Opcode,
95                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                  DebugLoc dl,
97                  ArrayRef<std::pair<unsigned, bool> > Regs,
98                  ArrayRef<unsigned> ImpDefs);
99    void MergeOpsUpdate(MachineBasicBlock &MBB,
100                        MemOpQueue &MemOps,
101                        unsigned memOpsBegin,
102                        unsigned memOpsEnd,
103                        unsigned insertAfter,
104                        int Offset,
105                        unsigned Base,
106                        bool BaseKill,
107                        int Opcode,
108                        ARMCC::CondCodes Pred,
109                        unsigned PredReg,
110                        unsigned Scratch,
111                        DebugLoc dl,
112                        SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
113    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114                      int Opcode, unsigned Size,
115                      ARMCC::CondCodes Pred, unsigned PredReg,
116                      unsigned Scratch, MemOpQueue &MemOps,
117                      SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
118
119    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121                             MachineBasicBlock::iterator &MBBI);
122    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123                                  MachineBasicBlock::iterator MBBI,
124                                  const TargetInstrInfo *TII,
125                                  bool &Advance,
126                                  MachineBasicBlock::iterator &I);
127    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128                                   MachineBasicBlock::iterator MBBI,
129                                   bool &Advance,
130                                   MachineBasicBlock::iterator &I);
131    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
133  };
134  char ARMLoadStoreOpt::ID = 0;
135}
136
137static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
138  switch (Opcode) {
139  default: llvm_unreachable("Unhandled opcode!");
140  case ARM::LDRi12:
141    ++NumLDMGened;
142    switch (Mode) {
143    default: llvm_unreachable("Unhandled submode!");
144    case ARM_AM::ia: return ARM::LDMIA;
145    case ARM_AM::da: return ARM::LDMDA;
146    case ARM_AM::db: return ARM::LDMDB;
147    case ARM_AM::ib: return ARM::LDMIB;
148    }
149  case ARM::STRi12:
150    ++NumSTMGened;
151    switch (Mode) {
152    default: llvm_unreachable("Unhandled submode!");
153    case ARM_AM::ia: return ARM::STMIA;
154    case ARM_AM::da: return ARM::STMDA;
155    case ARM_AM::db: return ARM::STMDB;
156    case ARM_AM::ib: return ARM::STMIB;
157    }
158  case ARM::t2LDRi8:
159  case ARM::t2LDRi12:
160    ++NumLDMGened;
161    switch (Mode) {
162    default: llvm_unreachable("Unhandled submode!");
163    case ARM_AM::ia: return ARM::t2LDMIA;
164    case ARM_AM::db: return ARM::t2LDMDB;
165    }
166  case ARM::t2STRi8:
167  case ARM::t2STRi12:
168    ++NumSTMGened;
169    switch (Mode) {
170    default: llvm_unreachable("Unhandled submode!");
171    case ARM_AM::ia: return ARM::t2STMIA;
172    case ARM_AM::db: return ARM::t2STMDB;
173    }
174  case ARM::VLDRS:
175    ++NumVLDMGened;
176    switch (Mode) {
177    default: llvm_unreachable("Unhandled submode!");
178    case ARM_AM::ia: return ARM::VLDMSIA;
179    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
180    }
181  case ARM::VSTRS:
182    ++NumVSTMGened;
183    switch (Mode) {
184    default: llvm_unreachable("Unhandled submode!");
185    case ARM_AM::ia: return ARM::VSTMSIA;
186    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187    }
188  case ARM::VLDRD:
189    ++NumVLDMGened;
190    switch (Mode) {
191    default: llvm_unreachable("Unhandled submode!");
192    case ARM_AM::ia: return ARM::VLDMDIA;
193    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
194    }
195  case ARM::VSTRD:
196    ++NumVSTMGened;
197    switch (Mode) {
198    default: llvm_unreachable("Unhandled submode!");
199    case ARM_AM::ia: return ARM::VSTMDIA;
200    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
201    }
202  }
203}
204
205namespace llvm {
206  namespace ARM_AM {
207
208AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
209  switch (Opcode) {
210  default: llvm_unreachable("Unhandled opcode!");
211  case ARM::LDMIA_RET:
212  case ARM::LDMIA:
213  case ARM::LDMIA_UPD:
214  case ARM::STMIA:
215  case ARM::STMIA_UPD:
216  case ARM::t2LDMIA_RET:
217  case ARM::t2LDMIA:
218  case ARM::t2LDMIA_UPD:
219  case ARM::t2STMIA:
220  case ARM::t2STMIA_UPD:
221  case ARM::VLDMSIA:
222  case ARM::VLDMSIA_UPD:
223  case ARM::VSTMSIA:
224  case ARM::VSTMSIA_UPD:
225  case ARM::VLDMDIA:
226  case ARM::VLDMDIA_UPD:
227  case ARM::VSTMDIA:
228  case ARM::VSTMDIA_UPD:
229    return ARM_AM::ia;
230
231  case ARM::LDMDA:
232  case ARM::LDMDA_UPD:
233  case ARM::STMDA:
234  case ARM::STMDA_UPD:
235    return ARM_AM::da;
236
237  case ARM::LDMDB:
238  case ARM::LDMDB_UPD:
239  case ARM::STMDB:
240  case ARM::STMDB_UPD:
241  case ARM::t2LDMDB:
242  case ARM::t2LDMDB_UPD:
243  case ARM::t2STMDB:
244  case ARM::t2STMDB_UPD:
245  case ARM::VLDMSDB_UPD:
246  case ARM::VSTMSDB_UPD:
247  case ARM::VLDMDDB_UPD:
248  case ARM::VSTMDDB_UPD:
249    return ARM_AM::db;
250
251  case ARM::LDMIB:
252  case ARM::LDMIB_UPD:
253  case ARM::STMIB:
254  case ARM::STMIB_UPD:
255    return ARM_AM::ib;
256  }
257}
258
259  } // end namespace ARM_AM
260} // end namespace llvm
261
262static bool isT2i32Load(unsigned Opc) {
263  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264}
265
266static bool isi32Load(unsigned Opc) {
267  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268}
269
270static bool isT2i32Store(unsigned Opc) {
271  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272}
273
274static bool isi32Store(unsigned Opc) {
275  return Opc == ARM::STRi12 || isT2i32Store(Opc);
276}
277
278/// MergeOps - Create and insert a LDM or STM with Base as base register and
279/// registers in Regs as the register operands that would be loaded / stored.
280/// It returns true if the transformation is done.
281bool
282ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283                          MachineBasicBlock::iterator MBBI,
284                          int Offset, unsigned Base, bool BaseKill,
285                          int Opcode, ARMCC::CondCodes Pred,
286                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
287                          ArrayRef<std::pair<unsigned, bool> > Regs,
288                          ArrayRef<unsigned> ImpDefs) {
289  // Only a single register to load / store. Don't bother.
290  unsigned NumRegs = Regs.size();
291  if (NumRegs <= 1)
292    return false;
293
294  ARM_AM::AMSubMode Mode = ARM_AM::ia;
295  // VFP and Thumb2 do not support IB or DA modes.
296  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297  bool haveIBAndDA = isNotVFP && !isThumb2;
298  if (Offset == 4 && haveIBAndDA)
299    Mode = ARM_AM::ib;
300  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
301    Mode = ARM_AM::da;
302  else if (Offset == -4 * (int)NumRegs && isNotVFP)
303    // VLDM/VSTM do not support DB mode without also updating the base reg.
304    Mode = ARM_AM::db;
305  else if (Offset != 0) {
306    // Check if this is a supported opcode before we insert instructions to
307    // calculate a new base register.
308    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
309
310    // If starting offset isn't zero, insert a MI to materialize a new base.
311    // But only do so if it is cost effective, i.e. merging more than two
312    // loads / stores.
313    if (NumRegs <= 2)
314      return false;
315
316    unsigned NewBase;
317    if (isi32Load(Opcode))
318      // If it is a load, then just use one of the destination register to
319      // use as the new base.
320      NewBase = Regs[NumRegs-1].first;
321    else {
322      // Use the scratch register to use as a new base.
323      NewBase = Scratch;
324      if (NewBase == 0)
325        return false;
326    }
327    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
328    if (Offset < 0) {
329      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
330      Offset = - Offset;
331    }
332    int ImmedOffset = isThumb2
333      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334    if (ImmedOffset == -1)
335      // FIXME: Try t2ADDri12 or t2SUBri12?
336      return false;  // Probably not worth it then.
337
338    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340      .addImm(Pred).addReg(PredReg).addReg(0);
341    Base = NewBase;
342    BaseKill = true;  // New base is always killed right its use.
343  }
344
345  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346                Opcode == ARM::VLDRD);
347  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348  if (!Opcode) return false;
349  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350    .addReg(Base, getKillRegState(BaseKill))
351    .addImm(Pred).addReg(PredReg);
352  for (unsigned i = 0; i != NumRegs; ++i)
353    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354                     | getKillRegState(Regs[i].second));
355
356  // Add implicit defs for super-registers.
357  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
359
360  return true;
361}
362
363// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
364// success.
365void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
366                                     MemOpQueue &memOps,
367                                     unsigned memOpsBegin, unsigned memOpsEnd,
368                                     unsigned insertAfter, int Offset,
369                                     unsigned Base, bool BaseKill,
370                                     int Opcode,
371                                     ARMCC::CondCodes Pred, unsigned PredReg,
372                                     unsigned Scratch,
373                                     DebugLoc dl,
374                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
375  // First calculate which of the registers should be killed by the merged
376  // instruction.
377  const unsigned insertPos = memOps[insertAfter].Position;
378  SmallSet<unsigned, 4> KilledRegs;
379  DenseMap<unsigned, unsigned> Killer;
380  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381    if (i == memOpsBegin) {
382      i = memOpsEnd;
383      if (i == e)
384        break;
385    }
386    if (memOps[i].Position < insertPos && memOps[i].isKill) {
387      unsigned Reg = memOps[i].Reg;
388      KilledRegs.insert(Reg);
389      Killer[Reg] = i;
390    }
391  }
392
393  SmallVector<std::pair<unsigned, bool>, 8> Regs;
394  SmallVector<unsigned, 8> ImpDefs;
395  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396    unsigned Reg = memOps[i].Reg;
397    // If we are inserting the merged operation after an operation that
398    // uses the same register, make sure to transfer any kill flag.
399    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400    Regs.push_back(std::make_pair(Reg, isKill));
401
402    // Collect any implicit defs of super-registers. They must be preserved.
403    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
405        continue;
406      unsigned DefReg = MO->getReg();
407      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408        ImpDefs.push_back(DefReg);
409    }
410  }
411
412  // Try to do the merge.
413  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
414  ++Loc;
415  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
417    return;
418
419  // Merge succeeded, update records.
420  Merges.push_back(prior(Loc));
421  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422    // Remove kill flags from any memops that come before insertPos.
423    if (Regs[i-memOpsBegin].second) {
424      unsigned Reg = Regs[i-memOpsBegin].first;
425      if (KilledRegs.count(Reg)) {
426        unsigned j = Killer[Reg];
427        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428        assert(Idx >= 0 && "Cannot find killing operand");
429        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430        memOps[j].isKill = false;
431      }
432      memOps[i].isKill = true;
433    }
434    MBB.erase(memOps[i].MBBI);
435    // Update this memop to refer to the merged instruction.
436    // We may need to move kill flags again.
437    memOps[i].Merged = true;
438    memOps[i].MBBI = Merges.back();
439    memOps[i].Position = insertPos;
440  }
441}
442
443/// MergeLDR_STR - Merge a number of load / store instructions into one or more
444/// load / store multiple instructions.
445void
446ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447                         unsigned Base, int Opcode, unsigned Size,
448                         ARMCC::CondCodes Pred, unsigned PredReg,
449                         unsigned Scratch, MemOpQueue &MemOps,
450                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
451  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452  int Offset = MemOps[SIndex].Offset;
453  int SOffset = Offset;
454  unsigned insertAfter = SIndex;
455  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456  DebugLoc dl = Loc->getDebugLoc();
457  const MachineOperand &PMO = Loc->getOperand(0);
458  unsigned PReg = PMO.getReg();
459  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
460  unsigned Count = 1;
461  unsigned Limit = ~0U;
462
463  // vldm / vstm limit are 32 for S variants, 16 for D variants.
464
465  switch (Opcode) {
466  default: break;
467  case ARM::VSTRS:
468    Limit = 32;
469    break;
470  case ARM::VSTRD:
471    Limit = 16;
472    break;
473  case ARM::VLDRD:
474    Limit = 16;
475    break;
476  case ARM::VLDRS:
477    Limit = 32;
478    break;
479  }
480
481  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
482    int NewOffset = MemOps[i].Offset;
483    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
484    unsigned Reg = MO.getReg();
485    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
486    // Register numbers must be in ascending order. For VFP / NEON load and
487    // store multiples, the registers must also be consecutive and within the
488    // limit on the number of registers per instruction.
489    if (Reg != ARM::SP &&
490        NewOffset == Offset + (int)Size &&
491        ((isNotVFP && RegNum > PRegNum) ||
492         ((Count < Limit) && RegNum == PRegNum+1))) {
493      Offset += Size;
494      PRegNum = RegNum;
495      ++Count;
496    } else {
497      // Can't merge this in. Try merge the earlier ones first.
498      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
499                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
500      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
501                   MemOps, Merges);
502      return;
503    }
504
505    if (MemOps[i].Position > MemOps[insertAfter].Position)
506      insertAfter = i;
507  }
508
509  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
510  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
511                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512  return;
513}
514
515static bool definesCPSR(MachineInstr *MI) {
516  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
517    const MachineOperand &MO = MI->getOperand(i);
518    if (!MO.isReg())
519      continue;
520    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
521      // If the instruction has live CPSR def, then it's not safe to fold it
522      // into load / store.
523      return true;
524  }
525
526  return false;
527}
528
529static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
530                                unsigned Bytes, unsigned Limit,
531                                ARMCC::CondCodes Pred, unsigned PredReg) {
532  unsigned MyPredReg = 0;
533  if (!MI)
534    return false;
535
536  bool CheckCPSRDef = false;
537  switch (MI->getOpcode()) {
538  default: return false;
539  case ARM::t2SUBri:
540  case ARM::SUBri:
541    CheckCPSRDef = true;
542  // fallthrough
543  case ARM::tSUBspi:
544    break;
545  }
546
547  // Make sure the offset fits in 8 bits.
548  if (Bytes == 0 || (Limit && Bytes >= Limit))
549    return false;
550
551  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
552  if (!(MI->getOperand(0).getReg() == Base &&
553        MI->getOperand(1).getReg() == Base &&
554        (MI->getOperand(2).getImm()*Scale) == Bytes &&
555        getInstrPredicate(MI, MyPredReg) == Pred &&
556        MyPredReg == PredReg))
557    return false;
558
559  return CheckCPSRDef ? !definesCPSR(MI) : true;
560}
561
562static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
563                                unsigned Bytes, unsigned Limit,
564                                ARMCC::CondCodes Pred, unsigned PredReg) {
565  unsigned MyPredReg = 0;
566  if (!MI)
567    return false;
568
569  bool CheckCPSRDef = false;
570  switch (MI->getOpcode()) {
571  default: return false;
572  case ARM::t2ADDri:
573  case ARM::ADDri:
574    CheckCPSRDef = true;
575  // fallthrough
576  case ARM::tADDspi:
577    break;
578  }
579
580  if (Bytes == 0 || (Limit && Bytes >= Limit))
581    // Make sure the offset fits in 8 bits.
582    return false;
583
584  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
585  if (!(MI->getOperand(0).getReg() == Base &&
586        MI->getOperand(1).getReg() == Base &&
587        (MI->getOperand(2).getImm()*Scale) == Bytes &&
588        getInstrPredicate(MI, MyPredReg) == Pred &&
589        MyPredReg == PredReg))
590    return false;
591
592  return CheckCPSRDef ? !definesCPSR(MI) : true;
593}
594
595static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
596  switch (MI->getOpcode()) {
597  default: return 0;
598  case ARM::LDRi12:
599  case ARM::STRi12:
600  case ARM::t2LDRi8:
601  case ARM::t2LDRi12:
602  case ARM::t2STRi8:
603  case ARM::t2STRi12:
604  case ARM::VLDRS:
605  case ARM::VSTRS:
606    return 4;
607  case ARM::VLDRD:
608  case ARM::VSTRD:
609    return 8;
610  case ARM::LDMIA:
611  case ARM::LDMDA:
612  case ARM::LDMDB:
613  case ARM::LDMIB:
614  case ARM::STMIA:
615  case ARM::STMDA:
616  case ARM::STMDB:
617  case ARM::STMIB:
618  case ARM::t2LDMIA:
619  case ARM::t2LDMDB:
620  case ARM::t2STMIA:
621  case ARM::t2STMDB:
622  case ARM::VLDMSIA:
623  case ARM::VSTMSIA:
624    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
625  case ARM::VLDMDIA:
626  case ARM::VSTMDIA:
627    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
628  }
629}
630
631static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
632                                            ARM_AM::AMSubMode Mode) {
633  switch (Opc) {
634  default: llvm_unreachable("Unhandled opcode!");
635  case ARM::LDMIA:
636  case ARM::LDMDA:
637  case ARM::LDMDB:
638  case ARM::LDMIB:
639    switch (Mode) {
640    default: llvm_unreachable("Unhandled submode!");
641    case ARM_AM::ia: return ARM::LDMIA_UPD;
642    case ARM_AM::ib: return ARM::LDMIB_UPD;
643    case ARM_AM::da: return ARM::LDMDA_UPD;
644    case ARM_AM::db: return ARM::LDMDB_UPD;
645    }
646  case ARM::STMIA:
647  case ARM::STMDA:
648  case ARM::STMDB:
649  case ARM::STMIB:
650    switch (Mode) {
651    default: llvm_unreachable("Unhandled submode!");
652    case ARM_AM::ia: return ARM::STMIA_UPD;
653    case ARM_AM::ib: return ARM::STMIB_UPD;
654    case ARM_AM::da: return ARM::STMDA_UPD;
655    case ARM_AM::db: return ARM::STMDB_UPD;
656    }
657  case ARM::t2LDMIA:
658  case ARM::t2LDMDB:
659    switch (Mode) {
660    default: llvm_unreachable("Unhandled submode!");
661    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
662    case ARM_AM::db: return ARM::t2LDMDB_UPD;
663    }
664  case ARM::t2STMIA:
665  case ARM::t2STMDB:
666    switch (Mode) {
667    default: llvm_unreachable("Unhandled submode!");
668    case ARM_AM::ia: return ARM::t2STMIA_UPD;
669    case ARM_AM::db: return ARM::t2STMDB_UPD;
670    }
671  case ARM::VLDMSIA:
672    switch (Mode) {
673    default: llvm_unreachable("Unhandled submode!");
674    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
675    case ARM_AM::db: return ARM::VLDMSDB_UPD;
676    }
677  case ARM::VLDMDIA:
678    switch (Mode) {
679    default: llvm_unreachable("Unhandled submode!");
680    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
681    case ARM_AM::db: return ARM::VLDMDDB_UPD;
682    }
683  case ARM::VSTMSIA:
684    switch (Mode) {
685    default: llvm_unreachable("Unhandled submode!");
686    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
687    case ARM_AM::db: return ARM::VSTMSDB_UPD;
688    }
689  case ARM::VSTMDIA:
690    switch (Mode) {
691    default: llvm_unreachable("Unhandled submode!");
692    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
693    case ARM_AM::db: return ARM::VSTMDDB_UPD;
694    }
695  }
696}
697
698/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
699/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
700///
701/// stmia rn, <ra, rb, rc>
702/// rn := rn + 4 * 3;
703/// =>
704/// stmia rn!, <ra, rb, rc>
705///
706/// rn := rn - 4 * 3;
707/// ldmia rn, <ra, rb, rc>
708/// =>
709/// ldmdb rn!, <ra, rb, rc>
710bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
711                                               MachineBasicBlock::iterator MBBI,
712                                               bool &Advance,
713                                               MachineBasicBlock::iterator &I) {
714  MachineInstr *MI = MBBI;
715  unsigned Base = MI->getOperand(0).getReg();
716  bool BaseKill = MI->getOperand(0).isKill();
717  unsigned Bytes = getLSMultipleTransferSize(MI);
718  unsigned PredReg = 0;
719  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
720  int Opcode = MI->getOpcode();
721  DebugLoc dl = MI->getDebugLoc();
722
723  // Can't use an updating ld/st if the base register is also a dest
724  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
725  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
726    if (MI->getOperand(i).getReg() == Base)
727      return false;
728
729  bool DoMerge = false;
730  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
731
732  // Try merging with the previous instruction.
733  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
734  if (MBBI != BeginMBBI) {
735    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
736    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
737      --PrevMBBI;
738    if (Mode == ARM_AM::ia &&
739        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
740      Mode = ARM_AM::db;
741      DoMerge = true;
742    } else if (Mode == ARM_AM::ib &&
743               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
744      Mode = ARM_AM::da;
745      DoMerge = true;
746    }
747    if (DoMerge)
748      MBB.erase(PrevMBBI);
749  }
750
751  // Try merging with the next instruction.
752  MachineBasicBlock::iterator EndMBBI = MBB.end();
753  if (!DoMerge && MBBI != EndMBBI) {
754    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
755    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
756      ++NextMBBI;
757    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
758        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
759      DoMerge = true;
760    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
761               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
762      DoMerge = true;
763    }
764    if (DoMerge) {
765      if (NextMBBI == I) {
766        Advance = true;
767        ++I;
768      }
769      MBB.erase(NextMBBI);
770    }
771  }
772
773  if (!DoMerge)
774    return false;
775
776  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
777  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
778    .addReg(Base, getDefRegState(true)) // WB base register
779    .addReg(Base, getKillRegState(BaseKill))
780    .addImm(Pred).addReg(PredReg);
781
782  // Transfer the rest of operands.
783  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
784    MIB.addOperand(MI->getOperand(OpNum));
785
786  // Transfer memoperands.
787  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
788
789  MBB.erase(MBBI);
790  return true;
791}
792
793static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
794                                             ARM_AM::AddrOpc Mode) {
795  switch (Opc) {
796  case ARM::LDRi12:
797    return ARM::LDR_PRE_IMM;
798  case ARM::STRi12:
799    return ARM::STR_PRE_IMM;
800  case ARM::VLDRS:
801    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802  case ARM::VLDRD:
803    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804  case ARM::VSTRS:
805    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806  case ARM::VSTRD:
807    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
808  case ARM::t2LDRi8:
809  case ARM::t2LDRi12:
810    return ARM::t2LDR_PRE;
811  case ARM::t2STRi8:
812  case ARM::t2STRi12:
813    return ARM::t2STR_PRE;
814  default: llvm_unreachable("Unhandled opcode!");
815  }
816}
817
818static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
819                                              ARM_AM::AddrOpc Mode) {
820  switch (Opc) {
821  case ARM::LDRi12:
822    return ARM::LDR_POST_IMM;
823  case ARM::STRi12:
824    return ARM::STR_POST_IMM;
825  case ARM::VLDRS:
826    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
827  case ARM::VLDRD:
828    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
829  case ARM::VSTRS:
830    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
831  case ARM::VSTRD:
832    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
833  case ARM::t2LDRi8:
834  case ARM::t2LDRi12:
835    return ARM::t2LDR_POST;
836  case ARM::t2STRi8:
837  case ARM::t2STRi12:
838    return ARM::t2STR_POST;
839  default: llvm_unreachable("Unhandled opcode!");
840  }
841}
842
843/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
844/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
845bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
846                                               MachineBasicBlock::iterator MBBI,
847                                               const TargetInstrInfo *TII,
848                                               bool &Advance,
849                                               MachineBasicBlock::iterator &I) {
850  MachineInstr *MI = MBBI;
851  unsigned Base = MI->getOperand(1).getReg();
852  bool BaseKill = MI->getOperand(1).isKill();
853  unsigned Bytes = getLSMultipleTransferSize(MI);
854  int Opcode = MI->getOpcode();
855  DebugLoc dl = MI->getDebugLoc();
856  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
857                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
858  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
859  if (isi32Load(Opcode) || isi32Store(Opcode))
860    if (MI->getOperand(2).getImm() != 0)
861      return false;
862  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
863    return false;
864
865  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
866  // Can't do the merge if the destination register is the same as the would-be
867  // writeback register.
868  if (MI->getOperand(0).getReg() == Base)
869    return false;
870
871  unsigned PredReg = 0;
872  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
873  bool DoMerge = false;
874  ARM_AM::AddrOpc AddSub = ARM_AM::add;
875  unsigned NewOpc = 0;
876  // AM2 - 12 bits, thumb2 - 8 bits.
877  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
878
879  // Try merging with the previous instruction.
880  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
881  if (MBBI != BeginMBBI) {
882    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
883    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
884      --PrevMBBI;
885    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
886      DoMerge = true;
887      AddSub = ARM_AM::sub;
888    } else if (!isAM5 &&
889               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
890      DoMerge = true;
891    }
892    if (DoMerge) {
893      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
894      MBB.erase(PrevMBBI);
895    }
896  }
897
898  // Try merging with the next instruction.
899  MachineBasicBlock::iterator EndMBBI = MBB.end();
900  if (!DoMerge && MBBI != EndMBBI) {
901    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
902    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
903      ++NextMBBI;
904    if (!isAM5 &&
905        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
906      DoMerge = true;
907      AddSub = ARM_AM::sub;
908    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
909      DoMerge = true;
910    }
911    if (DoMerge) {
912      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
913      if (NextMBBI == I) {
914        Advance = true;
915        ++I;
916      }
917      MBB.erase(NextMBBI);
918    }
919  }
920
921  if (!DoMerge)
922    return false;
923
924  if (isAM5) {
925    // VLDM[SD}_UPD, VSTM[SD]_UPD
926    // (There are no base-updating versions of VLDR/VSTR instructions, but the
927    // updating load/store-multiple instructions can be used with only one
928    // register.)
929    MachineOperand &MO = MI->getOperand(0);
930    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
931      .addReg(Base, getDefRegState(true)) // WB base register
932      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
933      .addImm(Pred).addReg(PredReg)
934      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
935                            getKillRegState(MO.isKill())));
936  } else if (isLd) {
937    if (isAM2) {
938      // LDR_PRE, LDR_POST
939      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
940        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
942          .addReg(Base, RegState::Define)
943          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944      } else {
945        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
946        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
947          .addReg(Base, RegState::Define)
948          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
949      }
950    } else {
951      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
952      // t2LDR_PRE, t2LDR_POST
953      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
954        .addReg(Base, RegState::Define)
955        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956    }
957  } else {
958    MachineOperand &MO = MI->getOperand(0);
959    // FIXME: post-indexed stores use am2offset_imm, which still encodes
960    // the vestigal zero-reg offset register. When that's fixed, this clause
961    // can be removed entirely.
962    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
963      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
964      // STR_PRE, STR_POST
965      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
966        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
967        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
968    } else {
969      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
970      // t2STR_PRE, t2STR_POST
971      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
972        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
973        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
974    }
975  }
976  MBB.erase(MBBI);
977
978  return true;
979}
980
981/// isMemoryOp - Returns true if instruction is a memory operation that this
982/// pass is capable of operating on.
983static bool isMemoryOp(const MachineInstr *MI) {
984  // When no memory operands are present, conservatively assume unaligned,
985  // volatile, unfoldable.
986  if (!MI->hasOneMemOperand())
987    return false;
988
989  const MachineMemOperand *MMO = *MI->memoperands_begin();
990
991  // Don't touch volatile memory accesses - we may be changing their order.
992  if (MMO->isVolatile())
993    return false;
994
995  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
996  // not.
997  if (MMO->getAlignment() < 4)
998    return false;
999
1000  // str <undef> could probably be eliminated entirely, but for now we just want
1001  // to avoid making a mess of it.
1002  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1003  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1004      MI->getOperand(0).isUndef())
1005    return false;
1006
1007  // Likewise don't mess with references to undefined addresses.
1008  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1009      MI->getOperand(1).isUndef())
1010    return false;
1011
1012  int Opcode = MI->getOpcode();
1013  switch (Opcode) {
1014  default: break;
1015  case ARM::VLDRS:
1016  case ARM::VSTRS:
1017    return MI->getOperand(1).isReg();
1018  case ARM::VLDRD:
1019  case ARM::VSTRD:
1020    return MI->getOperand(1).isReg();
1021  case ARM::LDRi12:
1022  case ARM::STRi12:
1023  case ARM::t2LDRi8:
1024  case ARM::t2LDRi12:
1025  case ARM::t2STRi8:
1026  case ARM::t2STRi12:
1027    return MI->getOperand(1).isReg();
1028  }
1029  return false;
1030}
1031
1032/// AdvanceRS - Advance register scavenger to just before the earliest memory
1033/// op that is being merged.
1034void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1035  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1036  unsigned Position = MemOps[0].Position;
1037  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1038    if (MemOps[i].Position < Position) {
1039      Position = MemOps[i].Position;
1040      Loc = MemOps[i].MBBI;
1041    }
1042  }
1043
1044  if (Loc != MBB.begin())
1045    RS->forward(prior(Loc));
1046}
1047
1048static int getMemoryOpOffset(const MachineInstr *MI) {
1049  int Opcode = MI->getOpcode();
1050  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1051  unsigned NumOperands = MI->getDesc().getNumOperands();
1052  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1053
1054  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1055      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1056      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1057      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1058    return OffField;
1059
1060  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1061    : ARM_AM::getAM5Offset(OffField) * 4;
1062  if (isAM3) {
1063    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1064      Offset = -Offset;
1065  } else {
1066    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1067      Offset = -Offset;
1068  }
1069  return Offset;
1070}
1071
1072static void InsertLDR_STR(MachineBasicBlock &MBB,
1073                          MachineBasicBlock::iterator &MBBI,
1074                          int Offset, bool isDef,
1075                          DebugLoc dl, unsigned NewOpc,
1076                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1077                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1078                          bool OffKill, bool OffUndef,
1079                          ARMCC::CondCodes Pred, unsigned PredReg,
1080                          const TargetInstrInfo *TII, bool isT2) {
1081  if (isDef) {
1082    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1083                                      TII->get(NewOpc))
1084      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1085      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1086    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1087  } else {
1088    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1089                                      TII->get(NewOpc))
1090      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1091      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1092    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1093  }
1094}
1095
1096bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1097                                          MachineBasicBlock::iterator &MBBI) {
1098  MachineInstr *MI = &*MBBI;
1099  unsigned Opcode = MI->getOpcode();
1100  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1101      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1102    const MachineOperand &BaseOp = MI->getOperand(2);
1103    unsigned BaseReg = BaseOp.getReg();
1104    unsigned EvenReg = MI->getOperand(0).getReg();
1105    unsigned OddReg  = MI->getOperand(1).getReg();
1106    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1107    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1108    // ARM errata 602117: LDRD with base in list may result in incorrect base
1109    // register when interrupted or faulted.
1110    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1111    if (!Errata602117 &&
1112        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1113      return false;
1114
1115    MachineBasicBlock::iterator NewBBI = MBBI;
1116    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1117    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1118    bool EvenDeadKill = isLd ?
1119      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1120    bool EvenUndef = MI->getOperand(0).isUndef();
1121    bool OddDeadKill  = isLd ?
1122      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1123    bool OddUndef = MI->getOperand(1).isUndef();
1124    bool BaseKill = BaseOp.isKill();
1125    bool BaseUndef = BaseOp.isUndef();
1126    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1127    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1128    int OffImm = getMemoryOpOffset(MI);
1129    unsigned PredReg = 0;
1130    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1131
1132    if (OddRegNum > EvenRegNum && OffImm == 0) {
1133      // Ascending register numbers and no offset. It's safe to change it to a
1134      // ldm or stm.
1135      unsigned NewOpc = (isLd)
1136        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1137        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1138      if (isLd) {
1139        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1140          .addReg(BaseReg, getKillRegState(BaseKill))
1141          .addImm(Pred).addReg(PredReg)
1142          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1143          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1144        ++NumLDRD2LDM;
1145      } else {
1146        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1147          .addReg(BaseReg, getKillRegState(BaseKill))
1148          .addImm(Pred).addReg(PredReg)
1149          .addReg(EvenReg,
1150                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1151          .addReg(OddReg,
1152                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1153        ++NumSTRD2STM;
1154      }
1155      NewBBI = llvm::prior(MBBI);
1156    } else {
1157      // Split into two instructions.
1158      unsigned NewOpc = (isLd)
1159        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1160        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1161      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1162      // so adjust and use t2LDRi12 here for that.
1163      unsigned NewOpc2 = (isLd)
1164        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1165        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1166      DebugLoc dl = MBBI->getDebugLoc();
1167      // If this is a load and base register is killed, it may have been
1168      // re-defed by the load, make sure the first load does not clobber it.
1169      if (isLd &&
1170          (BaseKill || OffKill) &&
1171          (TRI->regsOverlap(EvenReg, BaseReg))) {
1172        assert(!TRI->regsOverlap(OddReg, BaseReg));
1173        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1174                      OddReg, OddDeadKill, false,
1175                      BaseReg, false, BaseUndef, false, OffUndef,
1176                      Pred, PredReg, TII, isT2);
1177        NewBBI = llvm::prior(MBBI);
1178        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1179                      EvenReg, EvenDeadKill, false,
1180                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1181                      Pred, PredReg, TII, isT2);
1182      } else {
1183        if (OddReg == EvenReg && EvenDeadKill) {
1184          // If the two source operands are the same, the kill marker is
1185          // probably on the first one. e.g.
1186          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1187          EvenDeadKill = false;
1188          OddDeadKill = true;
1189        }
1190        // Never kill the base register in the first instruction.
1191        if (EvenReg == BaseReg)
1192          EvenDeadKill = false;
1193        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1194                      EvenReg, EvenDeadKill, EvenUndef,
1195                      BaseReg, false, BaseUndef, false, OffUndef,
1196                      Pred, PredReg, TII, isT2);
1197        NewBBI = llvm::prior(MBBI);
1198        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1199                      OddReg, OddDeadKill, OddUndef,
1200                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1201                      Pred, PredReg, TII, isT2);
1202      }
1203      if (isLd)
1204        ++NumLDRD2LDR;
1205      else
1206        ++NumSTRD2STR;
1207    }
1208
1209    MBB.erase(MI);
1210    MBBI = NewBBI;
1211    return true;
1212  }
1213  return false;
1214}
1215
1216/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1217/// ops of the same base and incrementing offset into LDM / STM ops.
1218bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1219  unsigned NumMerges = 0;
1220  unsigned NumMemOps = 0;
1221  MemOpQueue MemOps;
1222  unsigned CurrBase = 0;
1223  int CurrOpc = -1;
1224  unsigned CurrSize = 0;
1225  ARMCC::CondCodes CurrPred = ARMCC::AL;
1226  unsigned CurrPredReg = 0;
1227  unsigned Position = 0;
1228  SmallVector<MachineBasicBlock::iterator,4> Merges;
1229
1230  RS->enterBasicBlock(&MBB);
1231  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1232  while (MBBI != E) {
1233    if (FixInvalidRegPairOp(MBB, MBBI))
1234      continue;
1235
1236    bool Advance  = false;
1237    bool TryMerge = false;
1238    bool Clobber  = false;
1239
1240    bool isMemOp = isMemoryOp(MBBI);
1241    if (isMemOp) {
1242      int Opcode = MBBI->getOpcode();
1243      unsigned Size = getLSMultipleTransferSize(MBBI);
1244      const MachineOperand &MO = MBBI->getOperand(0);
1245      unsigned Reg = MO.getReg();
1246      bool isKill = MO.isDef() ? false : MO.isKill();
1247      unsigned Base = MBBI->getOperand(1).getReg();
1248      unsigned PredReg = 0;
1249      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1250      int Offset = getMemoryOpOffset(MBBI);
1251      // Watch out for:
1252      // r4 := ldr [r5]
1253      // r5 := ldr [r5, #4]
1254      // r6 := ldr [r5, #8]
1255      //
1256      // The second ldr has effectively broken the chain even though it
1257      // looks like the later ldr(s) use the same base register. Try to
1258      // merge the ldr's so far, including this one. But don't try to
1259      // combine the following ldr(s).
1260      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1261
1262      // Watch out for:
1263      // r4 := ldr [r0, #8]
1264      // r4 := ldr [r0, #4]
1265      //
1266      // The optimization may reorder the second ldr in front of the first
1267      // ldr, which violates write after write(WAW) dependence. The same as
1268      // str. Try to merge inst(s) already in MemOps.
1269      bool Overlap = false;
1270      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
1271        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
1272          Overlap = true;
1273          break;
1274        }
1275      }
1276
1277      if (CurrBase == 0 && !Clobber) {
1278        // Start of a new chain.
1279        CurrBase = Base;
1280        CurrOpc  = Opcode;
1281        CurrSize = Size;
1282        CurrPred = Pred;
1283        CurrPredReg = PredReg;
1284        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1285        ++NumMemOps;
1286        Advance = true;
1287      } else if (!Overlap) {
1288        if (Clobber) {
1289          TryMerge = true;
1290          Advance = true;
1291        }
1292
1293        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1294          // No need to match PredReg.
1295          // Continue adding to the queue.
1296          if (Offset > MemOps.back().Offset) {
1297            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1298                                             Position, MBBI));
1299            ++NumMemOps;
1300            Advance = true;
1301          } else {
1302            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1303                 I != E; ++I) {
1304              if (Offset < I->Offset) {
1305                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1306                                                 Position, MBBI));
1307                ++NumMemOps;
1308                Advance = true;
1309                break;
1310              } else if (Offset == I->Offset) {
1311                // Collision! This can't be merged!
1312                break;
1313              }
1314            }
1315          }
1316        }
1317      }
1318    }
1319
1320    if (MBBI->isDebugValue()) {
1321      ++MBBI;
1322      if (MBBI == E)
1323        // Reach the end of the block, try merging the memory instructions.
1324        TryMerge = true;
1325    } else if (Advance) {
1326      ++Position;
1327      ++MBBI;
1328      if (MBBI == E)
1329        // Reach the end of the block, try merging the memory instructions.
1330        TryMerge = true;
1331    } else
1332      TryMerge = true;
1333
1334    if (TryMerge) {
1335      if (NumMemOps > 1) {
1336        // Try to find a free register to use as a new base in case it's needed.
1337        // First advance to the instruction just before the start of the chain.
1338        AdvanceRS(MBB, MemOps);
1339        // Find a scratch register.
1340        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1341        // Process the load / store instructions.
1342        RS->forward(prior(MBBI));
1343
1344        // Merge ops.
1345        Merges.clear();
1346        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1347                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1348
1349        // Try folding preceding/trailing base inc/dec into the generated
1350        // LDM/STM ops.
1351        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1352          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1353            ++NumMerges;
1354        NumMerges += Merges.size();
1355
1356        // Try folding preceding/trailing base inc/dec into those load/store
1357        // that were not merged to form LDM/STM ops.
1358        for (unsigned i = 0; i != NumMemOps; ++i)
1359          if (!MemOps[i].Merged)
1360            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1361              ++NumMerges;
1362
1363        // RS may be pointing to an instruction that's deleted.
1364        RS->skipTo(prior(MBBI));
1365      } else if (NumMemOps == 1) {
1366        // Try folding preceding/trailing base inc/dec into the single
1367        // load/store.
1368        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1369          ++NumMerges;
1370          RS->forward(prior(MBBI));
1371        }
1372      }
1373
1374      CurrBase = 0;
1375      CurrOpc = -1;
1376      CurrSize = 0;
1377      CurrPred = ARMCC::AL;
1378      CurrPredReg = 0;
1379      if (NumMemOps) {
1380        MemOps.clear();
1381        NumMemOps = 0;
1382      }
1383
1384      // If iterator hasn't been advanced and this is not a memory op, skip it.
1385      // It can't start a new chain anyway.
1386      if (!Advance && !isMemOp && MBBI != E) {
1387        ++Position;
1388        ++MBBI;
1389      }
1390    }
1391  }
1392  return NumMerges > 0;
1393}
1394
1395/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1396/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1397/// directly restore the value of LR into pc.
1398///   ldmfd sp!, {..., lr}
1399///   bx lr
1400/// or
1401///   ldmfd sp!, {..., lr}
1402///   mov pc, lr
1403/// =>
1404///   ldmfd sp!, {..., pc}
1405bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1406  if (MBB.empty()) return false;
1407
1408  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1409  if (MBBI != MBB.begin() &&
1410      (MBBI->getOpcode() == ARM::BX_RET ||
1411       MBBI->getOpcode() == ARM::tBX_RET ||
1412       MBBI->getOpcode() == ARM::MOVPCLR)) {
1413    MachineInstr *PrevMI = prior(MBBI);
1414    unsigned Opcode = PrevMI->getOpcode();
1415    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1416        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1417        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1418      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1419      if (MO.getReg() != ARM::LR)
1420        return false;
1421      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1422      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1423              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1424      PrevMI->setDesc(TII->get(NewOpc));
1425      MO.setReg(ARM::PC);
1426      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1427      MBB.erase(MBBI);
1428      return true;
1429    }
1430  }
1431  return false;
1432}
1433
1434bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1435  const TargetMachine &TM = Fn.getTarget();
1436  AFI = Fn.getInfo<ARMFunctionInfo>();
1437  TII = TM.getInstrInfo();
1438  TRI = TM.getRegisterInfo();
1439  STI = &TM.getSubtarget<ARMSubtarget>();
1440  RS = new RegScavenger();
1441  isThumb2 = AFI->isThumb2Function();
1442
1443  bool Modified = false;
1444  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1445       ++MFI) {
1446    MachineBasicBlock &MBB = *MFI;
1447    Modified |= LoadStoreMultipleOpti(MBB);
1448    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1449      Modified |= MergeReturnIntoLDM(MBB);
1450  }
1451
1452  delete RS;
1453  return Modified;
1454}
1455
1456
1457/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1458/// load / stores from consecutive locations close to make it more
1459/// likely they will be combined later.
1460
1461namespace {
1462  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1463    static char ID;
1464    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1465
1466    const DataLayout *TD;
1467    const TargetInstrInfo *TII;
1468    const TargetRegisterInfo *TRI;
1469    const ARMSubtarget *STI;
1470    MachineRegisterInfo *MRI;
1471    MachineFunction *MF;
1472
1473    virtual bool runOnMachineFunction(MachineFunction &Fn);
1474
1475    virtual const char *getPassName() const {
1476      return "ARM pre- register allocation load / store optimization pass";
1477    }
1478
1479  private:
1480    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1481                          unsigned &NewOpc, unsigned &EvenReg,
1482                          unsigned &OddReg, unsigned &BaseReg,
1483                          int &Offset,
1484                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1485                          bool &isT2);
1486    bool RescheduleOps(MachineBasicBlock *MBB,
1487                       SmallVectorImpl<MachineInstr *> &Ops,
1488                       unsigned Base, bool isLd,
1489                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1490    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1491  };
1492  char ARMPreAllocLoadStoreOpt::ID = 0;
1493}
1494
1495bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1496  TD  = Fn.getTarget().getDataLayout();
1497  TII = Fn.getTarget().getInstrInfo();
1498  TRI = Fn.getTarget().getRegisterInfo();
1499  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1500  MRI = &Fn.getRegInfo();
1501  MF  = &Fn;
1502
1503  bool Modified = false;
1504  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1505       ++MFI)
1506    Modified |= RescheduleLoadStoreInstrs(MFI);
1507
1508  return Modified;
1509}
1510
1511static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1512                                      MachineBasicBlock::iterator I,
1513                                      MachineBasicBlock::iterator E,
1514                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1515                                      SmallSet<unsigned, 4> &MemRegs,
1516                                      const TargetRegisterInfo *TRI) {
1517  // Are there stores / loads / calls between them?
1518  // FIXME: This is overly conservative. We should make use of alias information
1519  // some day.
1520  SmallSet<unsigned, 4> AddedRegPressure;
1521  while (++I != E) {
1522    if (I->isDebugValue() || MemOps.count(&*I))
1523      continue;
1524    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1525      return false;
1526    if (isLd && I->mayStore())
1527      return false;
1528    if (!isLd) {
1529      if (I->mayLoad())
1530        return false;
1531      // It's not safe to move the first 'str' down.
1532      // str r1, [r0]
1533      // strh r5, [r0]
1534      // str r4, [r0, #+4]
1535      if (I->mayStore())
1536        return false;
1537    }
1538    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1539      MachineOperand &MO = I->getOperand(j);
1540      if (!MO.isReg())
1541        continue;
1542      unsigned Reg = MO.getReg();
1543      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1544        return false;
1545      if (Reg != Base && !MemRegs.count(Reg))
1546        AddedRegPressure.insert(Reg);
1547    }
1548  }
1549
1550  // Estimate register pressure increase due to the transformation.
1551  if (MemRegs.size() <= 4)
1552    // Ok if we are moving small number of instructions.
1553    return true;
1554  return AddedRegPressure.size() <= MemRegs.size() * 2;
1555}
1556
1557
1558/// Copy Op0 and Op1 operands into a new array assigned to MI.
1559static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1560                                   MachineInstr *Op1) {
1561  assert(MI->memoperands_empty() && "expected a new machineinstr");
1562  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1563    + (Op1->memoperands_end() - Op1->memoperands_begin());
1564
1565  MachineFunction *MF = MI->getParent()->getParent();
1566  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1567  MachineSDNode::mmo_iterator MemEnd =
1568    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1569  MemEnd =
1570    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1571  MI->setMemRefs(MemBegin, MemEnd);
1572}
1573
1574bool
1575ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1576                                          DebugLoc &dl,
1577                                          unsigned &NewOpc, unsigned &EvenReg,
1578                                          unsigned &OddReg, unsigned &BaseReg,
1579                                          int &Offset, unsigned &PredReg,
1580                                          ARMCC::CondCodes &Pred,
1581                                          bool &isT2) {
1582  // Make sure we're allowed to generate LDRD/STRD.
1583  if (!STI->hasV5TEOps())
1584    return false;
1585
1586  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1587  unsigned Scale = 1;
1588  unsigned Opcode = Op0->getOpcode();
1589  if (Opcode == ARM::LDRi12)
1590    NewOpc = ARM::LDRD;
1591  else if (Opcode == ARM::STRi12)
1592    NewOpc = ARM::STRD;
1593  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1594    NewOpc = ARM::t2LDRDi8;
1595    Scale = 4;
1596    isT2 = true;
1597  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1598    NewOpc = ARM::t2STRDi8;
1599    Scale = 4;
1600    isT2 = true;
1601  } else
1602    return false;
1603
1604  // Make sure the base address satisfies i64 ld / st alignment requirement.
1605  // At the moment, we ignore the memoryoperand's value.
1606  // If we want to use AliasAnalysis, we should check it accordingly.
1607  if (!Op0->hasOneMemOperand() ||
1608      (*Op0->memoperands_begin())->isVolatile())
1609    return false;
1610
1611  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1612  const Function *Func = MF->getFunction();
1613  unsigned ReqAlign = STI->hasV6Ops()
1614    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1615    : 8;  // Pre-v6 need 8-byte align
1616  if (Align < ReqAlign)
1617    return false;
1618
1619  // Then make sure the immediate offset fits.
1620  int OffImm = getMemoryOpOffset(Op0);
1621  if (isT2) {
1622    int Limit = (1 << 8) * Scale;
1623    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1624      return false;
1625    Offset = OffImm;
1626  } else {
1627    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1628    if (OffImm < 0) {
1629      AddSub = ARM_AM::sub;
1630      OffImm = - OffImm;
1631    }
1632    int Limit = (1 << 8) * Scale;
1633    if (OffImm >= Limit || (OffImm & (Scale-1)))
1634      return false;
1635    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1636  }
1637  EvenReg = Op0->getOperand(0).getReg();
1638  OddReg  = Op1->getOperand(0).getReg();
1639  if (EvenReg == OddReg)
1640    return false;
1641  BaseReg = Op0->getOperand(1).getReg();
1642  Pred = getInstrPredicate(Op0, PredReg);
1643  dl = Op0->getDebugLoc();
1644  return true;
1645}
1646
1647namespace {
1648  struct OffsetCompare {
1649    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1650      int LOffset = getMemoryOpOffset(LHS);
1651      int ROffset = getMemoryOpOffset(RHS);
1652      assert(LHS == RHS || LOffset != ROffset);
1653      return LOffset > ROffset;
1654    }
1655  };
1656}
1657
1658bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1659                                 SmallVectorImpl<MachineInstr *> &Ops,
1660                                 unsigned Base, bool isLd,
1661                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1662  bool RetVal = false;
1663
1664  // Sort by offset (in reverse order).
1665  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1666
1667  // The loads / stores of the same base are in order. Scan them from first to
1668  // last and check for the following:
1669  // 1. Any def of base.
1670  // 2. Any gaps.
1671  while (Ops.size() > 1) {
1672    unsigned FirstLoc = ~0U;
1673    unsigned LastLoc = 0;
1674    MachineInstr *FirstOp = 0;
1675    MachineInstr *LastOp = 0;
1676    int LastOffset = 0;
1677    unsigned LastOpcode = 0;
1678    unsigned LastBytes = 0;
1679    unsigned NumMove = 0;
1680    for (int i = Ops.size() - 1; i >= 0; --i) {
1681      MachineInstr *Op = Ops[i];
1682      unsigned Loc = MI2LocMap[Op];
1683      if (Loc <= FirstLoc) {
1684        FirstLoc = Loc;
1685        FirstOp = Op;
1686      }
1687      if (Loc >= LastLoc) {
1688        LastLoc = Loc;
1689        LastOp = Op;
1690      }
1691
1692      unsigned LSMOpcode
1693        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1694      if (LastOpcode && LSMOpcode != LastOpcode)
1695        break;
1696
1697      int Offset = getMemoryOpOffset(Op);
1698      unsigned Bytes = getLSMultipleTransferSize(Op);
1699      if (LastBytes) {
1700        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1701          break;
1702      }
1703      LastOffset = Offset;
1704      LastBytes = Bytes;
1705      LastOpcode = LSMOpcode;
1706      if (++NumMove == 8) // FIXME: Tune this limit.
1707        break;
1708    }
1709
1710    if (NumMove <= 1)
1711      Ops.pop_back();
1712    else {
1713      SmallPtrSet<MachineInstr*, 4> MemOps;
1714      SmallSet<unsigned, 4> MemRegs;
1715      for (int i = NumMove-1; i >= 0; --i) {
1716        MemOps.insert(Ops[i]);
1717        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1718      }
1719
1720      // Be conservative, if the instructions are too far apart, don't
1721      // move them. We want to limit the increase of register pressure.
1722      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1723      if (DoMove)
1724        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1725                                           MemOps, MemRegs, TRI);
1726      if (!DoMove) {
1727        for (unsigned i = 0; i != NumMove; ++i)
1728          Ops.pop_back();
1729      } else {
1730        // This is the new location for the loads / stores.
1731        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1732        while (InsertPos != MBB->end()
1733               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1734          ++InsertPos;
1735
1736        // If we are moving a pair of loads / stores, see if it makes sense
1737        // to try to allocate a pair of registers that can form register pairs.
1738        MachineInstr *Op0 = Ops.back();
1739        MachineInstr *Op1 = Ops[Ops.size()-2];
1740        unsigned EvenReg = 0, OddReg = 0;
1741        unsigned BaseReg = 0, PredReg = 0;
1742        ARMCC::CondCodes Pred = ARMCC::AL;
1743        bool isT2 = false;
1744        unsigned NewOpc = 0;
1745        int Offset = 0;
1746        DebugLoc dl;
1747        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1748                                             EvenReg, OddReg, BaseReg,
1749                                             Offset, PredReg, Pred, isT2)) {
1750          Ops.pop_back();
1751          Ops.pop_back();
1752
1753          const MCInstrDesc &MCID = TII->get(NewOpc);
1754          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1755          MRI->constrainRegClass(EvenReg, TRC);
1756          MRI->constrainRegClass(OddReg, TRC);
1757
1758          // Form the pair instruction.
1759          if (isLd) {
1760            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1761              .addReg(EvenReg, RegState::Define)
1762              .addReg(OddReg, RegState::Define)
1763              .addReg(BaseReg);
1764            // FIXME: We're converting from LDRi12 to an insn that still
1765            // uses addrmode2, so we need an explicit offset reg. It should
1766            // always by reg0 since we're transforming LDRi12s.
1767            if (!isT2)
1768              MIB.addReg(0);
1769            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1770            concatenateMemOperands(MIB, Op0, Op1);
1771            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1772            ++NumLDRDFormed;
1773          } else {
1774            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1775              .addReg(EvenReg)
1776              .addReg(OddReg)
1777              .addReg(BaseReg);
1778            // FIXME: We're converting from LDRi12 to an insn that still
1779            // uses addrmode2, so we need an explicit offset reg. It should
1780            // always by reg0 since we're transforming STRi12s.
1781            if (!isT2)
1782              MIB.addReg(0);
1783            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1784            concatenateMemOperands(MIB, Op0, Op1);
1785            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1786            ++NumSTRDFormed;
1787          }
1788          MBB->erase(Op0);
1789          MBB->erase(Op1);
1790
1791          // Add register allocation hints to form register pairs.
1792          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1793          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1794        } else {
1795          for (unsigned i = 0; i != NumMove; ++i) {
1796            MachineInstr *Op = Ops.back();
1797            Ops.pop_back();
1798            MBB->splice(InsertPos, MBB, Op);
1799          }
1800        }
1801
1802        NumLdStMoved += NumMove;
1803        RetVal = true;
1804      }
1805    }
1806  }
1807
1808  return RetVal;
1809}
1810
1811bool
1812ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1813  bool RetVal = false;
1814
1815  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1816  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1817  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1818  SmallVector<unsigned, 4> LdBases;
1819  SmallVector<unsigned, 4> StBases;
1820
1821  unsigned Loc = 0;
1822  MachineBasicBlock::iterator MBBI = MBB->begin();
1823  MachineBasicBlock::iterator E = MBB->end();
1824  while (MBBI != E) {
1825    for (; MBBI != E; ++MBBI) {
1826      MachineInstr *MI = MBBI;
1827      if (MI->isCall() || MI->isTerminator()) {
1828        // Stop at barriers.
1829        ++MBBI;
1830        break;
1831      }
1832
1833      if (!MI->isDebugValue())
1834        MI2LocMap[MI] = ++Loc;
1835
1836      if (!isMemoryOp(MI))
1837        continue;
1838      unsigned PredReg = 0;
1839      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1840        continue;
1841
1842      int Opc = MI->getOpcode();
1843      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1844      unsigned Base = MI->getOperand(1).getReg();
1845      int Offset = getMemoryOpOffset(MI);
1846
1847      bool StopHere = false;
1848      if (isLd) {
1849        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1850          Base2LdsMap.find(Base);
1851        if (BI != Base2LdsMap.end()) {
1852          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1853            if (Offset == getMemoryOpOffset(BI->second[i])) {
1854              StopHere = true;
1855              break;
1856            }
1857          }
1858          if (!StopHere)
1859            BI->second.push_back(MI);
1860        } else {
1861          Base2LdsMap[Base].push_back(MI);
1862          LdBases.push_back(Base);
1863        }
1864      } else {
1865        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1866          Base2StsMap.find(Base);
1867        if (BI != Base2StsMap.end()) {
1868          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1869            if (Offset == getMemoryOpOffset(BI->second[i])) {
1870              StopHere = true;
1871              break;
1872            }
1873          }
1874          if (!StopHere)
1875            BI->second.push_back(MI);
1876        } else {
1877          Base2StsMap[Base].push_back(MI);
1878          StBases.push_back(Base);
1879        }
1880      }
1881
1882      if (StopHere) {
1883        // Found a duplicate (a base+offset combination that's seen earlier).
1884        // Backtrack.
1885        --Loc;
1886        break;
1887      }
1888    }
1889
1890    // Re-schedule loads.
1891    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1892      unsigned Base = LdBases[i];
1893      SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
1894      if (Lds.size() > 1)
1895        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1896    }
1897
1898    // Re-schedule stores.
1899    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1900      unsigned Base = StBases[i];
1901      SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
1902      if (Sts.size() > 1)
1903        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1904    }
1905
1906    if (MBBI != E) {
1907      Base2LdsMap.clear();
1908      Base2StsMap.clear();
1909      LdBases.clear();
1910      StBases.clear();
1911    }
1912  }
1913
1914  return RetVal;
1915}
1916
1917
1918/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1919/// optimization pass.
1920FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1921  if (PreAlloc)
1922    return new ARMPreAllocLoadStoreOpt();
1923  return new ARMLoadStoreOpt();
1924}
1925