1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMBaseRegisterInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallPtrSet.h"
24#include "llvm/ADT/SmallSet.h"
25#include "llvm/ADT/SmallVector.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/CodeGen/MachineBasicBlock.h"
28#include "llvm/CodeGen/MachineFunctionPass.h"
29#include "llvm/CodeGen/MachineInstr.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/CodeGen/RegisterScavenging.h"
33#include "llvm/CodeGen/SelectionDAGNodes.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/Function.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/ErrorHandling.h"
39#include "llvm/Support/raw_ostream.h"
40#include "llvm/Target/TargetInstrInfo.h"
41#include "llvm/Target/TargetMachine.h"
42#include "llvm/Target/TargetRegisterInfo.h"
43using namespace llvm;
44
45STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46STATISTIC(NumSTMGened , "Number of stm instructions generated");
47STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56
57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58/// load / store instructions to form ldm / stm instructions.
59
60namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62    static char ID;
63    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65    const TargetInstrInfo *TII;
66    const TargetRegisterInfo *TRI;
67    const ARMSubtarget *STI;
68    ARMFunctionInfo *AFI;
69    RegScavenger *RS;
70    bool isThumb2;
71
72    virtual bool runOnMachineFunction(MachineFunction &Fn);
73
74    virtual const char *getPassName() const {
75      return "ARM load / store optimization pass";
76    }
77
78  private:
79    struct MemOpQueueEntry {
80      int Offset;
81      unsigned Reg;
82      bool isKill;
83      unsigned Position;
84      MachineBasicBlock::iterator MBBI;
85      bool Merged;
86      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                      MachineBasicBlock::iterator i)
88        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89    };
90    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91    typedef MemOpQueue::iterator MemOpQueueIter;
92
93    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                  int Offset, unsigned Base, bool BaseKill, int Opcode,
95                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                  DebugLoc dl,
97                  ArrayRef<std::pair<unsigned, bool> > Regs,
98                  ArrayRef<unsigned> ImpDefs);
99    void MergeOpsUpdate(MachineBasicBlock &MBB,
100                        MemOpQueue &MemOps,
101                        unsigned memOpsBegin,
102                        unsigned memOpsEnd,
103                        unsigned insertAfter,
104                        int Offset,
105                        unsigned Base,
106                        bool BaseKill,
107                        int Opcode,
108                        ARMCC::CondCodes Pred,
109                        unsigned PredReg,
110                        unsigned Scratch,
111                        DebugLoc dl,
112                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114                      int Opcode, unsigned Size,
115                      ARMCC::CondCodes Pred, unsigned PredReg,
116                      unsigned Scratch, MemOpQueue &MemOps,
117                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
118
119    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121                             MachineBasicBlock::iterator &MBBI);
122    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123                                  MachineBasicBlock::iterator MBBI,
124                                  const TargetInstrInfo *TII,
125                                  bool &Advance,
126                                  MachineBasicBlock::iterator &I);
127    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128                                   MachineBasicBlock::iterator MBBI,
129                                   bool &Advance,
130                                   MachineBasicBlock::iterator &I);
131    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
133  };
134  char ARMLoadStoreOpt::ID = 0;
135}
136
137static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
138  switch (Opcode) {
139  default: llvm_unreachable("Unhandled opcode!");
140  case ARM::LDRi12:
141    ++NumLDMGened;
142    switch (Mode) {
143    default: llvm_unreachable("Unhandled submode!");
144    case ARM_AM::ia: return ARM::LDMIA;
145    case ARM_AM::da: return ARM::LDMDA;
146    case ARM_AM::db: return ARM::LDMDB;
147    case ARM_AM::ib: return ARM::LDMIB;
148    }
149  case ARM::STRi12:
150    ++NumSTMGened;
151    switch (Mode) {
152    default: llvm_unreachable("Unhandled submode!");
153    case ARM_AM::ia: return ARM::STMIA;
154    case ARM_AM::da: return ARM::STMDA;
155    case ARM_AM::db: return ARM::STMDB;
156    case ARM_AM::ib: return ARM::STMIB;
157    }
158  case ARM::t2LDRi8:
159  case ARM::t2LDRi12:
160    ++NumLDMGened;
161    switch (Mode) {
162    default: llvm_unreachable("Unhandled submode!");
163    case ARM_AM::ia: return ARM::t2LDMIA;
164    case ARM_AM::db: return ARM::t2LDMDB;
165    }
166  case ARM::t2STRi8:
167  case ARM::t2STRi12:
168    ++NumSTMGened;
169    switch (Mode) {
170    default: llvm_unreachable("Unhandled submode!");
171    case ARM_AM::ia: return ARM::t2STMIA;
172    case ARM_AM::db: return ARM::t2STMDB;
173    }
174  case ARM::VLDRS:
175    ++NumVLDMGened;
176    switch (Mode) {
177    default: llvm_unreachable("Unhandled submode!");
178    case ARM_AM::ia: return ARM::VLDMSIA;
179    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
180    }
181  case ARM::VSTRS:
182    ++NumVSTMGened;
183    switch (Mode) {
184    default: llvm_unreachable("Unhandled submode!");
185    case ARM_AM::ia: return ARM::VSTMSIA;
186    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187    }
188  case ARM::VLDRD:
189    ++NumVLDMGened;
190    switch (Mode) {
191    default: llvm_unreachable("Unhandled submode!");
192    case ARM_AM::ia: return ARM::VLDMDIA;
193    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
194    }
195  case ARM::VSTRD:
196    ++NumVSTMGened;
197    switch (Mode) {
198    default: llvm_unreachable("Unhandled submode!");
199    case ARM_AM::ia: return ARM::VSTMDIA;
200    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
201    }
202  }
203}
204
205namespace llvm {
206  namespace ARM_AM {
207
208AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
209  switch (Opcode) {
210  default: llvm_unreachable("Unhandled opcode!");
211  case ARM::LDMIA_RET:
212  case ARM::LDMIA:
213  case ARM::LDMIA_UPD:
214  case ARM::STMIA:
215  case ARM::STMIA_UPD:
216  case ARM::t2LDMIA_RET:
217  case ARM::t2LDMIA:
218  case ARM::t2LDMIA_UPD:
219  case ARM::t2STMIA:
220  case ARM::t2STMIA_UPD:
221  case ARM::VLDMSIA:
222  case ARM::VLDMSIA_UPD:
223  case ARM::VSTMSIA:
224  case ARM::VSTMSIA_UPD:
225  case ARM::VLDMDIA:
226  case ARM::VLDMDIA_UPD:
227  case ARM::VSTMDIA:
228  case ARM::VSTMDIA_UPD:
229    return ARM_AM::ia;
230
231  case ARM::LDMDA:
232  case ARM::LDMDA_UPD:
233  case ARM::STMDA:
234  case ARM::STMDA_UPD:
235    return ARM_AM::da;
236
237  case ARM::LDMDB:
238  case ARM::LDMDB_UPD:
239  case ARM::STMDB:
240  case ARM::STMDB_UPD:
241  case ARM::t2LDMDB:
242  case ARM::t2LDMDB_UPD:
243  case ARM::t2STMDB:
244  case ARM::t2STMDB_UPD:
245  case ARM::VLDMSDB_UPD:
246  case ARM::VSTMSDB_UPD:
247  case ARM::VLDMDDB_UPD:
248  case ARM::VSTMDDB_UPD:
249    return ARM_AM::db;
250
251  case ARM::LDMIB:
252  case ARM::LDMIB_UPD:
253  case ARM::STMIB:
254  case ARM::STMIB_UPD:
255    return ARM_AM::ib;
256  }
257}
258
259  } // end namespace ARM_AM
260} // end namespace llvm
261
262static bool isT2i32Load(unsigned Opc) {
263  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264}
265
266static bool isi32Load(unsigned Opc) {
267  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268}
269
270static bool isT2i32Store(unsigned Opc) {
271  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272}
273
274static bool isi32Store(unsigned Opc) {
275  return Opc == ARM::STRi12 || isT2i32Store(Opc);
276}
277
278/// MergeOps - Create and insert a LDM or STM with Base as base register and
279/// registers in Regs as the register operands that would be loaded / stored.
280/// It returns true if the transformation is done.
281bool
282ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283                          MachineBasicBlock::iterator MBBI,
284                          int Offset, unsigned Base, bool BaseKill,
285                          int Opcode, ARMCC::CondCodes Pred,
286                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
287                          ArrayRef<std::pair<unsigned, bool> > Regs,
288                          ArrayRef<unsigned> ImpDefs) {
289  // Only a single register to load / store. Don't bother.
290  unsigned NumRegs = Regs.size();
291  if (NumRegs <= 1)
292    return false;
293
294  ARM_AM::AMSubMode Mode = ARM_AM::ia;
295  // VFP and Thumb2 do not support IB or DA modes.
296  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297  bool haveIBAndDA = isNotVFP && !isThumb2;
298  if (Offset == 4 && haveIBAndDA)
299    Mode = ARM_AM::ib;
300  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
301    Mode = ARM_AM::da;
302  else if (Offset == -4 * (int)NumRegs && isNotVFP)
303    // VLDM/VSTM do not support DB mode without also updating the base reg.
304    Mode = ARM_AM::db;
305  else if (Offset != 0) {
306    // Check if this is a supported opcode before we insert instructions to
307    // calculate a new base register.
308    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
309
310    // If starting offset isn't zero, insert a MI to materialize a new base.
311    // But only do so if it is cost effective, i.e. merging more than two
312    // loads / stores.
313    if (NumRegs <= 2)
314      return false;
315
316    unsigned NewBase;
317    if (isi32Load(Opcode))
318      // If it is a load, then just use one of the destination register to
319      // use as the new base.
320      NewBase = Regs[NumRegs-1].first;
321    else {
322      // Use the scratch register to use as a new base.
323      NewBase = Scratch;
324      if (NewBase == 0)
325        return false;
326    }
327    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
328    if (Offset < 0) {
329      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
330      Offset = - Offset;
331    }
332    int ImmedOffset = isThumb2
333      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334    if (ImmedOffset == -1)
335      // FIXME: Try t2ADDri12 or t2SUBri12?
336      return false;  // Probably not worth it then.
337
338    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340      .addImm(Pred).addReg(PredReg).addReg(0);
341    Base = NewBase;
342    BaseKill = true;  // New base is always killed right its use.
343  }
344
345  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346                Opcode == ARM::VLDRD);
347  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348  if (!Opcode) return false;
349  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350    .addReg(Base, getKillRegState(BaseKill))
351    .addImm(Pred).addReg(PredReg);
352  for (unsigned i = 0; i != NumRegs; ++i)
353    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354                     | getKillRegState(Regs[i].second));
355
356  // Add implicit defs for super-registers.
357  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
359
360  return true;
361}
362
363// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
364// success.
365void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
366                                     MemOpQueue &memOps,
367                                     unsigned memOpsBegin, unsigned memOpsEnd,
368                                     unsigned insertAfter, int Offset,
369                                     unsigned Base, bool BaseKill,
370                                     int Opcode,
371                                     ARMCC::CondCodes Pred, unsigned PredReg,
372                                     unsigned Scratch,
373                                     DebugLoc dl,
374                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
375  // First calculate which of the registers should be killed by the merged
376  // instruction.
377  const unsigned insertPos = memOps[insertAfter].Position;
378  SmallSet<unsigned, 4> KilledRegs;
379  DenseMap<unsigned, unsigned> Killer;
380  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381    if (i == memOpsBegin) {
382      i = memOpsEnd;
383      if (i == e)
384        break;
385    }
386    if (memOps[i].Position < insertPos && memOps[i].isKill) {
387      unsigned Reg = memOps[i].Reg;
388      KilledRegs.insert(Reg);
389      Killer[Reg] = i;
390    }
391  }
392
393  SmallVector<std::pair<unsigned, bool>, 8> Regs;
394  SmallVector<unsigned, 8> ImpDefs;
395  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396    unsigned Reg = memOps[i].Reg;
397    // If we are inserting the merged operation after an operation that
398    // uses the same register, make sure to transfer any kill flag.
399    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400    Regs.push_back(std::make_pair(Reg, isKill));
401
402    // Collect any implicit defs of super-registers. They must be preserved.
403    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
405        continue;
406      unsigned DefReg = MO->getReg();
407      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408        ImpDefs.push_back(DefReg);
409    }
410  }
411
412  // Try to do the merge.
413  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
414  ++Loc;
415  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
417    return;
418
419  // Merge succeeded, update records.
420  Merges.push_back(prior(Loc));
421  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422    // Remove kill flags from any memops that come before insertPos.
423    if (Regs[i-memOpsBegin].second) {
424      unsigned Reg = Regs[i-memOpsBegin].first;
425      if (KilledRegs.count(Reg)) {
426        unsigned j = Killer[Reg];
427        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428        assert(Idx >= 0 && "Cannot find killing operand");
429        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430        memOps[j].isKill = false;
431      }
432      memOps[i].isKill = true;
433    }
434    MBB.erase(memOps[i].MBBI);
435    // Update this memop to refer to the merged instruction.
436    // We may need to move kill flags again.
437    memOps[i].Merged = true;
438    memOps[i].MBBI = Merges.back();
439    memOps[i].Position = insertPos;
440  }
441}
442
443/// MergeLDR_STR - Merge a number of load / store instructions into one or more
444/// load / store multiple instructions.
445void
446ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447                          unsigned Base, int Opcode, unsigned Size,
448                          ARMCC::CondCodes Pred, unsigned PredReg,
449                          unsigned Scratch, MemOpQueue &MemOps,
450                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452  int Offset = MemOps[SIndex].Offset;
453  int SOffset = Offset;
454  unsigned insertAfter = SIndex;
455  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456  DebugLoc dl = Loc->getDebugLoc();
457  const MachineOperand &PMO = Loc->getOperand(0);
458  unsigned PReg = PMO.getReg();
459  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
460  unsigned Count = 1;
461  unsigned Limit = ~0U;
462
463  // vldm / vstm limit are 32 for S variants, 16 for D variants.
464
465  switch (Opcode) {
466  default: break;
467  case ARM::VSTRS:
468    Limit = 32;
469    break;
470  case ARM::VSTRD:
471    Limit = 16;
472    break;
473  case ARM::VLDRD:
474    Limit = 16;
475    break;
476  case ARM::VLDRS:
477    Limit = 32;
478    break;
479  }
480
481  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
482    int NewOffset = MemOps[i].Offset;
483    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
484    unsigned Reg = MO.getReg();
485    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
486    // Register numbers must be in ascending order. For VFP / NEON load and
487    // store multiples, the registers must also be consecutive and within the
488    // limit on the number of registers per instruction.
489    if (Reg != ARM::SP &&
490        NewOffset == Offset + (int)Size &&
491        ((isNotVFP && RegNum > PRegNum) ||
492         ((Count < Limit) && RegNum == PRegNum+1))) {
493      Offset += Size;
494      PRegNum = RegNum;
495      ++Count;
496    } else {
497      // Can't merge this in. Try merge the earlier ones first.
498      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
499                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
500      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
501                   MemOps, Merges);
502      return;
503    }
504
505    if (MemOps[i].Position > MemOps[insertAfter].Position)
506      insertAfter = i;
507  }
508
509  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
510  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
511                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512  return;
513}
514
515static bool definesCPSR(MachineInstr *MI) {
516  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
517    const MachineOperand &MO = MI->getOperand(i);
518    if (!MO.isReg())
519      continue;
520    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
521      // If the instruction has live CPSR def, then it's not safe to fold it
522      // into load / store.
523      return true;
524  }
525
526  return false;
527}
528
529static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
530                                unsigned Bytes, unsigned Limit,
531                                ARMCC::CondCodes Pred, unsigned PredReg) {
532  unsigned MyPredReg = 0;
533  if (!MI)
534    return false;
535
536  bool CheckCPSRDef = false;
537  switch (MI->getOpcode()) {
538  default: return false;
539  case ARM::t2SUBri:
540  case ARM::SUBri:
541    CheckCPSRDef = true;
542  // fallthrough
543  case ARM::tSUBspi:
544    break;
545  }
546
547  // Make sure the offset fits in 8 bits.
548  if (Bytes == 0 || (Limit && Bytes >= Limit))
549    return false;
550
551  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
552  if (!(MI->getOperand(0).getReg() == Base &&
553        MI->getOperand(1).getReg() == Base &&
554        (MI->getOperand(2).getImm()*Scale) == Bytes &&
555        getInstrPredicate(MI, MyPredReg) == Pred &&
556        MyPredReg == PredReg))
557    return false;
558
559  return CheckCPSRDef ? !definesCPSR(MI) : true;
560}
561
562static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
563                                unsigned Bytes, unsigned Limit,
564                                ARMCC::CondCodes Pred, unsigned PredReg) {
565  unsigned MyPredReg = 0;
566  if (!MI)
567    return false;
568
569  bool CheckCPSRDef = false;
570  switch (MI->getOpcode()) {
571  default: return false;
572  case ARM::t2ADDri:
573  case ARM::ADDri:
574    CheckCPSRDef = true;
575  // fallthrough
576  case ARM::tADDspi:
577    break;
578  }
579
580  if (Bytes == 0 || (Limit && Bytes >= Limit))
581    // Make sure the offset fits in 8 bits.
582    return false;
583
584  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
585  if (!(MI->getOperand(0).getReg() == Base &&
586        MI->getOperand(1).getReg() == Base &&
587        (MI->getOperand(2).getImm()*Scale) == Bytes &&
588        getInstrPredicate(MI, MyPredReg) == Pred &&
589        MyPredReg == PredReg))
590    return false;
591
592  return CheckCPSRDef ? !definesCPSR(MI) : true;
593}
594
595static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
596  switch (MI->getOpcode()) {
597  default: return 0;
598  case ARM::LDRi12:
599  case ARM::STRi12:
600  case ARM::t2LDRi8:
601  case ARM::t2LDRi12:
602  case ARM::t2STRi8:
603  case ARM::t2STRi12:
604  case ARM::VLDRS:
605  case ARM::VSTRS:
606    return 4;
607  case ARM::VLDRD:
608  case ARM::VSTRD:
609    return 8;
610  case ARM::LDMIA:
611  case ARM::LDMDA:
612  case ARM::LDMDB:
613  case ARM::LDMIB:
614  case ARM::STMIA:
615  case ARM::STMDA:
616  case ARM::STMDB:
617  case ARM::STMIB:
618  case ARM::t2LDMIA:
619  case ARM::t2LDMDB:
620  case ARM::t2STMIA:
621  case ARM::t2STMDB:
622  case ARM::VLDMSIA:
623  case ARM::VSTMSIA:
624    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
625  case ARM::VLDMDIA:
626  case ARM::VSTMDIA:
627    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
628  }
629}
630
631static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
632                                            ARM_AM::AMSubMode Mode) {
633  switch (Opc) {
634  default: llvm_unreachable("Unhandled opcode!");
635  case ARM::LDMIA:
636  case ARM::LDMDA:
637  case ARM::LDMDB:
638  case ARM::LDMIB:
639    switch (Mode) {
640    default: llvm_unreachable("Unhandled submode!");
641    case ARM_AM::ia: return ARM::LDMIA_UPD;
642    case ARM_AM::ib: return ARM::LDMIB_UPD;
643    case ARM_AM::da: return ARM::LDMDA_UPD;
644    case ARM_AM::db: return ARM::LDMDB_UPD;
645    }
646  case ARM::STMIA:
647  case ARM::STMDA:
648  case ARM::STMDB:
649  case ARM::STMIB:
650    switch (Mode) {
651    default: llvm_unreachable("Unhandled submode!");
652    case ARM_AM::ia: return ARM::STMIA_UPD;
653    case ARM_AM::ib: return ARM::STMIB_UPD;
654    case ARM_AM::da: return ARM::STMDA_UPD;
655    case ARM_AM::db: return ARM::STMDB_UPD;
656    }
657  case ARM::t2LDMIA:
658  case ARM::t2LDMDB:
659    switch (Mode) {
660    default: llvm_unreachable("Unhandled submode!");
661    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
662    case ARM_AM::db: return ARM::t2LDMDB_UPD;
663    }
664  case ARM::t2STMIA:
665  case ARM::t2STMDB:
666    switch (Mode) {
667    default: llvm_unreachable("Unhandled submode!");
668    case ARM_AM::ia: return ARM::t2STMIA_UPD;
669    case ARM_AM::db: return ARM::t2STMDB_UPD;
670    }
671  case ARM::VLDMSIA:
672    switch (Mode) {
673    default: llvm_unreachable("Unhandled submode!");
674    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
675    case ARM_AM::db: return ARM::VLDMSDB_UPD;
676    }
677  case ARM::VLDMDIA:
678    switch (Mode) {
679    default: llvm_unreachable("Unhandled submode!");
680    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
681    case ARM_AM::db: return ARM::VLDMDDB_UPD;
682    }
683  case ARM::VSTMSIA:
684    switch (Mode) {
685    default: llvm_unreachable("Unhandled submode!");
686    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
687    case ARM_AM::db: return ARM::VSTMSDB_UPD;
688    }
689  case ARM::VSTMDIA:
690    switch (Mode) {
691    default: llvm_unreachable("Unhandled submode!");
692    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
693    case ARM_AM::db: return ARM::VSTMDDB_UPD;
694    }
695  }
696}
697
698/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
699/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
700///
701/// stmia rn, <ra, rb, rc>
702/// rn := rn + 4 * 3;
703/// =>
704/// stmia rn!, <ra, rb, rc>
705///
706/// rn := rn - 4 * 3;
707/// ldmia rn, <ra, rb, rc>
708/// =>
709/// ldmdb rn!, <ra, rb, rc>
710bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
711                                               MachineBasicBlock::iterator MBBI,
712                                               bool &Advance,
713                                               MachineBasicBlock::iterator &I) {
714  MachineInstr *MI = MBBI;
715  unsigned Base = MI->getOperand(0).getReg();
716  bool BaseKill = MI->getOperand(0).isKill();
717  unsigned Bytes = getLSMultipleTransferSize(MI);
718  unsigned PredReg = 0;
719  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
720  int Opcode = MI->getOpcode();
721  DebugLoc dl = MI->getDebugLoc();
722
723  // Can't use an updating ld/st if the base register is also a dest
724  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
725  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
726    if (MI->getOperand(i).getReg() == Base)
727      return false;
728
729  bool DoMerge = false;
730  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
731
732  // Try merging with the previous instruction.
733  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
734  if (MBBI != BeginMBBI) {
735    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
736    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
737      --PrevMBBI;
738    if (Mode == ARM_AM::ia &&
739        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
740      Mode = ARM_AM::db;
741      DoMerge = true;
742    } else if (Mode == ARM_AM::ib &&
743               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
744      Mode = ARM_AM::da;
745      DoMerge = true;
746    }
747    if (DoMerge)
748      MBB.erase(PrevMBBI);
749  }
750
751  // Try merging with the next instruction.
752  MachineBasicBlock::iterator EndMBBI = MBB.end();
753  if (!DoMerge && MBBI != EndMBBI) {
754    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
755    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
756      ++NextMBBI;
757    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
758        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
759      DoMerge = true;
760    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
761               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
762      DoMerge = true;
763    }
764    if (DoMerge) {
765      if (NextMBBI == I) {
766        Advance = true;
767        ++I;
768      }
769      MBB.erase(NextMBBI);
770    }
771  }
772
773  if (!DoMerge)
774    return false;
775
776  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
777  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
778    .addReg(Base, getDefRegState(true)) // WB base register
779    .addReg(Base, getKillRegState(BaseKill))
780    .addImm(Pred).addReg(PredReg);
781
782  // Transfer the rest of operands.
783  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
784    MIB.addOperand(MI->getOperand(OpNum));
785
786  // Transfer memoperands.
787  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
788
789  MBB.erase(MBBI);
790  return true;
791}
792
793static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
794                                             ARM_AM::AddrOpc Mode) {
795  switch (Opc) {
796  case ARM::LDRi12:
797    return ARM::LDR_PRE_IMM;
798  case ARM::STRi12:
799    return ARM::STR_PRE_IMM;
800  case ARM::VLDRS:
801    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802  case ARM::VLDRD:
803    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804  case ARM::VSTRS:
805    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806  case ARM::VSTRD:
807    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
808  case ARM::t2LDRi8:
809  case ARM::t2LDRi12:
810    return ARM::t2LDR_PRE;
811  case ARM::t2STRi8:
812  case ARM::t2STRi12:
813    return ARM::t2STR_PRE;
814  default: llvm_unreachable("Unhandled opcode!");
815  }
816}
817
818static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
819                                              ARM_AM::AddrOpc Mode) {
820  switch (Opc) {
821  case ARM::LDRi12:
822    return ARM::LDR_POST_IMM;
823  case ARM::STRi12:
824    return ARM::STR_POST_IMM;
825  case ARM::VLDRS:
826    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
827  case ARM::VLDRD:
828    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
829  case ARM::VSTRS:
830    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
831  case ARM::VSTRD:
832    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
833  case ARM::t2LDRi8:
834  case ARM::t2LDRi12:
835    return ARM::t2LDR_POST;
836  case ARM::t2STRi8:
837  case ARM::t2STRi12:
838    return ARM::t2STR_POST;
839  default: llvm_unreachable("Unhandled opcode!");
840  }
841}
842
843/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
844/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
845bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
846                                               MachineBasicBlock::iterator MBBI,
847                                               const TargetInstrInfo *TII,
848                                               bool &Advance,
849                                               MachineBasicBlock::iterator &I) {
850  MachineInstr *MI = MBBI;
851  unsigned Base = MI->getOperand(1).getReg();
852  bool BaseKill = MI->getOperand(1).isKill();
853  unsigned Bytes = getLSMultipleTransferSize(MI);
854  int Opcode = MI->getOpcode();
855  DebugLoc dl = MI->getDebugLoc();
856  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
857                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
858  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
859  if (isi32Load(Opcode) || isi32Store(Opcode))
860    if (MI->getOperand(2).getImm() != 0)
861      return false;
862  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
863    return false;
864
865  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
866  // Can't do the merge if the destination register is the same as the would-be
867  // writeback register.
868  if (isLd && MI->getOperand(0).getReg() == Base)
869    return false;
870
871  unsigned PredReg = 0;
872  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
873  bool DoMerge = false;
874  ARM_AM::AddrOpc AddSub = ARM_AM::add;
875  unsigned NewOpc = 0;
876  // AM2 - 12 bits, thumb2 - 8 bits.
877  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
878
879  // Try merging with the previous instruction.
880  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
881  if (MBBI != BeginMBBI) {
882    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
883    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
884      --PrevMBBI;
885    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
886      DoMerge = true;
887      AddSub = ARM_AM::sub;
888    } else if (!isAM5 &&
889               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
890      DoMerge = true;
891    }
892    if (DoMerge) {
893      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
894      MBB.erase(PrevMBBI);
895    }
896  }
897
898  // Try merging with the next instruction.
899  MachineBasicBlock::iterator EndMBBI = MBB.end();
900  if (!DoMerge && MBBI != EndMBBI) {
901    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
902    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
903      ++NextMBBI;
904    if (!isAM5 &&
905        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
906      DoMerge = true;
907      AddSub = ARM_AM::sub;
908    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
909      DoMerge = true;
910    }
911    if (DoMerge) {
912      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
913      if (NextMBBI == I) {
914        Advance = true;
915        ++I;
916      }
917      MBB.erase(NextMBBI);
918    }
919  }
920
921  if (!DoMerge)
922    return false;
923
924  if (isAM5) {
925    // VLDM[SD}_UPD, VSTM[SD]_UPD
926    // (There are no base-updating versions of VLDR/VSTR instructions, but the
927    // updating load/store-multiple instructions can be used with only one
928    // register.)
929    MachineOperand &MO = MI->getOperand(0);
930    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
931      .addReg(Base, getDefRegState(true)) // WB base register
932      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
933      .addImm(Pred).addReg(PredReg)
934      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
935                            getKillRegState(MO.isKill())));
936  } else if (isLd) {
937    if (isAM2) {
938      // LDR_PRE, LDR_POST
939      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
940        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
942          .addReg(Base, RegState::Define)
943          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944      } else {
945        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
946        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
947          .addReg(Base, RegState::Define)
948          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
949      }
950    } else {
951      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
952      // t2LDR_PRE, t2LDR_POST
953      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
954        .addReg(Base, RegState::Define)
955        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956    }
957  } else {
958    MachineOperand &MO = MI->getOperand(0);
959    // FIXME: post-indexed stores use am2offset_imm, which still encodes
960    // the vestigal zero-reg offset register. When that's fixed, this clause
961    // can be removed entirely.
962    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
963      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
964      // STR_PRE, STR_POST
965      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
966        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
967        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
968    } else {
969      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
970      // t2STR_PRE, t2STR_POST
971      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
972        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
973        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
974    }
975  }
976  MBB.erase(MBBI);
977
978  return true;
979}
980
981/// isMemoryOp - Returns true if instruction is a memory operation that this
982/// pass is capable of operating on.
983static bool isMemoryOp(const MachineInstr *MI) {
984  // When no memory operands are present, conservatively assume unaligned,
985  // volatile, unfoldable.
986  if (!MI->hasOneMemOperand())
987    return false;
988
989  const MachineMemOperand *MMO = *MI->memoperands_begin();
990
991  // Don't touch volatile memory accesses - we may be changing their order.
992  if (MMO->isVolatile())
993    return false;
994
995  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
996  // not.
997  if (MMO->getAlignment() < 4)
998    return false;
999
1000  // str <undef> could probably be eliminated entirely, but for now we just want
1001  // to avoid making a mess of it.
1002  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1003  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1004      MI->getOperand(0).isUndef())
1005    return false;
1006
1007  // Likewise don't mess with references to undefined addresses.
1008  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1009      MI->getOperand(1).isUndef())
1010    return false;
1011
1012  int Opcode = MI->getOpcode();
1013  switch (Opcode) {
1014  default: break;
1015  case ARM::VLDRS:
1016  case ARM::VSTRS:
1017    return MI->getOperand(1).isReg();
1018  case ARM::VLDRD:
1019  case ARM::VSTRD:
1020    return MI->getOperand(1).isReg();
1021  case ARM::LDRi12:
1022  case ARM::STRi12:
1023  case ARM::t2LDRi8:
1024  case ARM::t2LDRi12:
1025  case ARM::t2STRi8:
1026  case ARM::t2STRi12:
1027    return MI->getOperand(1).isReg();
1028  }
1029  return false;
1030}
1031
1032/// AdvanceRS - Advance register scavenger to just before the earliest memory
1033/// op that is being merged.
1034void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1035  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1036  unsigned Position = MemOps[0].Position;
1037  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1038    if (MemOps[i].Position < Position) {
1039      Position = MemOps[i].Position;
1040      Loc = MemOps[i].MBBI;
1041    }
1042  }
1043
1044  if (Loc != MBB.begin())
1045    RS->forward(prior(Loc));
1046}
1047
1048static int getMemoryOpOffset(const MachineInstr *MI) {
1049  int Opcode = MI->getOpcode();
1050  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1051  unsigned NumOperands = MI->getDesc().getNumOperands();
1052  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1053
1054  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1055      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1056      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1057      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1058    return OffField;
1059
1060  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1061    : ARM_AM::getAM5Offset(OffField) * 4;
1062  if (isAM3) {
1063    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1064      Offset = -Offset;
1065  } else {
1066    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1067      Offset = -Offset;
1068  }
1069  return Offset;
1070}
1071
1072static void InsertLDR_STR(MachineBasicBlock &MBB,
1073                          MachineBasicBlock::iterator &MBBI,
1074                          int Offset, bool isDef,
1075                          DebugLoc dl, unsigned NewOpc,
1076                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1077                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1078                          bool OffKill, bool OffUndef,
1079                          ARMCC::CondCodes Pred, unsigned PredReg,
1080                          const TargetInstrInfo *TII, bool isT2) {
1081  if (isDef) {
1082    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1083                                      TII->get(NewOpc))
1084      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1085      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1086    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1087  } else {
1088    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1089                                      TII->get(NewOpc))
1090      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1091      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1092    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1093  }
1094}
1095
1096bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1097                                          MachineBasicBlock::iterator &MBBI) {
1098  MachineInstr *MI = &*MBBI;
1099  unsigned Opcode = MI->getOpcode();
1100  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1101      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1102    const MachineOperand &BaseOp = MI->getOperand(2);
1103    unsigned BaseReg = BaseOp.getReg();
1104    unsigned EvenReg = MI->getOperand(0).getReg();
1105    unsigned OddReg  = MI->getOperand(1).getReg();
1106    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1107    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1108    // ARM errata 602117: LDRD with base in list may result in incorrect base
1109    // register when interrupted or faulted.
1110    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1111    if (!Errata602117 &&
1112        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1113      return false;
1114
1115    MachineBasicBlock::iterator NewBBI = MBBI;
1116    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1117    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1118    bool EvenDeadKill = isLd ?
1119      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1120    bool EvenUndef = MI->getOperand(0).isUndef();
1121    bool OddDeadKill  = isLd ?
1122      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1123    bool OddUndef = MI->getOperand(1).isUndef();
1124    bool BaseKill = BaseOp.isKill();
1125    bool BaseUndef = BaseOp.isUndef();
1126    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1127    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1128    int OffImm = getMemoryOpOffset(MI);
1129    unsigned PredReg = 0;
1130    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1131
1132    if (OddRegNum > EvenRegNum && OffImm == 0) {
1133      // Ascending register numbers and no offset. It's safe to change it to a
1134      // ldm or stm.
1135      unsigned NewOpc = (isLd)
1136        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1137        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1138      if (isLd) {
1139        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1140          .addReg(BaseReg, getKillRegState(BaseKill))
1141          .addImm(Pred).addReg(PredReg)
1142          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1143          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1144        ++NumLDRD2LDM;
1145      } else {
1146        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1147          .addReg(BaseReg, getKillRegState(BaseKill))
1148          .addImm(Pred).addReg(PredReg)
1149          .addReg(EvenReg,
1150                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1151          .addReg(OddReg,
1152                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1153        ++NumSTRD2STM;
1154      }
1155      NewBBI = llvm::prior(MBBI);
1156    } else {
1157      // Split into two instructions.
1158      unsigned NewOpc = (isLd)
1159        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1160        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1161      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1162      // so adjust and use t2LDRi12 here for that.
1163      unsigned NewOpc2 = (isLd)
1164        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1165        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1166      DebugLoc dl = MBBI->getDebugLoc();
1167      // If this is a load and base register is killed, it may have been
1168      // re-defed by the load, make sure the first load does not clobber it.
1169      if (isLd &&
1170          (BaseKill || OffKill) &&
1171          (TRI->regsOverlap(EvenReg, BaseReg))) {
1172        assert(!TRI->regsOverlap(OddReg, BaseReg));
1173        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1174                      OddReg, OddDeadKill, false,
1175                      BaseReg, false, BaseUndef, false, OffUndef,
1176                      Pred, PredReg, TII, isT2);
1177        NewBBI = llvm::prior(MBBI);
1178        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1179                      EvenReg, EvenDeadKill, false,
1180                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1181                      Pred, PredReg, TII, isT2);
1182      } else {
1183        if (OddReg == EvenReg && EvenDeadKill) {
1184          // If the two source operands are the same, the kill marker is
1185          // probably on the first one. e.g.
1186          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1187          EvenDeadKill = false;
1188          OddDeadKill = true;
1189        }
1190        // Never kill the base register in the first instruction.
1191        if (EvenReg == BaseReg)
1192          EvenDeadKill = false;
1193        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1194                      EvenReg, EvenDeadKill, EvenUndef,
1195                      BaseReg, false, BaseUndef, false, OffUndef,
1196                      Pred, PredReg, TII, isT2);
1197        NewBBI = llvm::prior(MBBI);
1198        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1199                      OddReg, OddDeadKill, OddUndef,
1200                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1201                      Pred, PredReg, TII, isT2);
1202      }
1203      if (isLd)
1204        ++NumLDRD2LDR;
1205      else
1206        ++NumSTRD2STR;
1207    }
1208
1209    MBB.erase(MI);
1210    MBBI = NewBBI;
1211    return true;
1212  }
1213  return false;
1214}
1215
1216/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1217/// ops of the same base and incrementing offset into LDM / STM ops.
1218bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1219  unsigned NumMerges = 0;
1220  unsigned NumMemOps = 0;
1221  MemOpQueue MemOps;
1222  unsigned CurrBase = 0;
1223  int CurrOpc = -1;
1224  unsigned CurrSize = 0;
1225  ARMCC::CondCodes CurrPred = ARMCC::AL;
1226  unsigned CurrPredReg = 0;
1227  unsigned Position = 0;
1228  SmallVector<MachineBasicBlock::iterator,4> Merges;
1229
1230  RS->enterBasicBlock(&MBB);
1231  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1232  while (MBBI != E) {
1233    if (FixInvalidRegPairOp(MBB, MBBI))
1234      continue;
1235
1236    bool Advance  = false;
1237    bool TryMerge = false;
1238    bool Clobber  = false;
1239
1240    bool isMemOp = isMemoryOp(MBBI);
1241    if (isMemOp) {
1242      int Opcode = MBBI->getOpcode();
1243      unsigned Size = getLSMultipleTransferSize(MBBI);
1244      const MachineOperand &MO = MBBI->getOperand(0);
1245      unsigned Reg = MO.getReg();
1246      bool isKill = MO.isDef() ? false : MO.isKill();
1247      unsigned Base = MBBI->getOperand(1).getReg();
1248      unsigned PredReg = 0;
1249      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1250      int Offset = getMemoryOpOffset(MBBI);
1251      // Watch out for:
1252      // r4 := ldr [r5]
1253      // r5 := ldr [r5, #4]
1254      // r6 := ldr [r5, #8]
1255      //
1256      // The second ldr has effectively broken the chain even though it
1257      // looks like the later ldr(s) use the same base register. Try to
1258      // merge the ldr's so far, including this one. But don't try to
1259      // combine the following ldr(s).
1260      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1261      if (CurrBase == 0 && !Clobber) {
1262        // Start of a new chain.
1263        CurrBase = Base;
1264        CurrOpc  = Opcode;
1265        CurrSize = Size;
1266        CurrPred = Pred;
1267        CurrPredReg = PredReg;
1268        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1269        ++NumMemOps;
1270        Advance = true;
1271      } else {
1272        if (Clobber) {
1273          TryMerge = true;
1274          Advance = true;
1275        }
1276
1277        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1278          // No need to match PredReg.
1279          // Continue adding to the queue.
1280          if (Offset > MemOps.back().Offset) {
1281            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1282                                             Position, MBBI));
1283            ++NumMemOps;
1284            Advance = true;
1285          } else {
1286            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1287                 I != E; ++I) {
1288              if (Offset < I->Offset) {
1289                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1290                                                 Position, MBBI));
1291                ++NumMemOps;
1292                Advance = true;
1293                break;
1294              } else if (Offset == I->Offset) {
1295                // Collision! This can't be merged!
1296                break;
1297              }
1298            }
1299          }
1300        }
1301      }
1302    }
1303
1304    if (MBBI->isDebugValue()) {
1305      ++MBBI;
1306      if (MBBI == E)
1307        // Reach the end of the block, try merging the memory instructions.
1308        TryMerge = true;
1309    } else if (Advance) {
1310      ++Position;
1311      ++MBBI;
1312      if (MBBI == E)
1313        // Reach the end of the block, try merging the memory instructions.
1314        TryMerge = true;
1315    } else
1316      TryMerge = true;
1317
1318    if (TryMerge) {
1319      if (NumMemOps > 1) {
1320        // Try to find a free register to use as a new base in case it's needed.
1321        // First advance to the instruction just before the start of the chain.
1322        AdvanceRS(MBB, MemOps);
1323        // Find a scratch register.
1324        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1325        // Process the load / store instructions.
1326        RS->forward(prior(MBBI));
1327
1328        // Merge ops.
1329        Merges.clear();
1330        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1331                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1332
1333        // Try folding preceding/trailing base inc/dec into the generated
1334        // LDM/STM ops.
1335        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1336          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1337            ++NumMerges;
1338        NumMerges += Merges.size();
1339
1340        // Try folding preceding/trailing base inc/dec into those load/store
1341        // that were not merged to form LDM/STM ops.
1342        for (unsigned i = 0; i != NumMemOps; ++i)
1343          if (!MemOps[i].Merged)
1344            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1345              ++NumMerges;
1346
1347        // RS may be pointing to an instruction that's deleted.
1348        RS->skipTo(prior(MBBI));
1349      } else if (NumMemOps == 1) {
1350        // Try folding preceding/trailing base inc/dec into the single
1351        // load/store.
1352        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1353          ++NumMerges;
1354          RS->forward(prior(MBBI));
1355        }
1356      }
1357
1358      CurrBase = 0;
1359      CurrOpc = -1;
1360      CurrSize = 0;
1361      CurrPred = ARMCC::AL;
1362      CurrPredReg = 0;
1363      if (NumMemOps) {
1364        MemOps.clear();
1365        NumMemOps = 0;
1366      }
1367
1368      // If iterator hasn't been advanced and this is not a memory op, skip it.
1369      // It can't start a new chain anyway.
1370      if (!Advance && !isMemOp && MBBI != E) {
1371        ++Position;
1372        ++MBBI;
1373      }
1374    }
1375  }
1376  return NumMerges > 0;
1377}
1378
1379/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1380/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1381/// directly restore the value of LR into pc.
1382///   ldmfd sp!, {..., lr}
1383///   bx lr
1384/// or
1385///   ldmfd sp!, {..., lr}
1386///   mov pc, lr
1387/// =>
1388///   ldmfd sp!, {..., pc}
1389bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1390  if (MBB.empty()) return false;
1391
1392  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1393  if (MBBI != MBB.begin() &&
1394      (MBBI->getOpcode() == ARM::BX_RET ||
1395       MBBI->getOpcode() == ARM::tBX_RET ||
1396       MBBI->getOpcode() == ARM::MOVPCLR)) {
1397    MachineInstr *PrevMI = prior(MBBI);
1398    unsigned Opcode = PrevMI->getOpcode();
1399    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1400        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1401        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1402      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1403      if (MO.getReg() != ARM::LR)
1404        return false;
1405      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1406      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1407              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1408      PrevMI->setDesc(TII->get(NewOpc));
1409      MO.setReg(ARM::PC);
1410      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1411      MBB.erase(MBBI);
1412      return true;
1413    }
1414  }
1415  return false;
1416}
1417
1418bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1419  const TargetMachine &TM = Fn.getTarget();
1420  AFI = Fn.getInfo<ARMFunctionInfo>();
1421  TII = TM.getInstrInfo();
1422  TRI = TM.getRegisterInfo();
1423  STI = &TM.getSubtarget<ARMSubtarget>();
1424  RS = new RegScavenger();
1425  isThumb2 = AFI->isThumb2Function();
1426
1427  bool Modified = false;
1428  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1429       ++MFI) {
1430    MachineBasicBlock &MBB = *MFI;
1431    Modified |= LoadStoreMultipleOpti(MBB);
1432    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1433      Modified |= MergeReturnIntoLDM(MBB);
1434  }
1435
1436  delete RS;
1437  return Modified;
1438}
1439
1440
1441/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1442/// load / stores from consecutive locations close to make it more
1443/// likely they will be combined later.
1444
1445namespace {
1446  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1447    static char ID;
1448    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1449
1450    const DataLayout *TD;
1451    const TargetInstrInfo *TII;
1452    const TargetRegisterInfo *TRI;
1453    const ARMSubtarget *STI;
1454    MachineRegisterInfo *MRI;
1455    MachineFunction *MF;
1456
1457    virtual bool runOnMachineFunction(MachineFunction &Fn);
1458
1459    virtual const char *getPassName() const {
1460      return "ARM pre- register allocation load / store optimization pass";
1461    }
1462
1463  private:
1464    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1465                          unsigned &NewOpc, unsigned &EvenReg,
1466                          unsigned &OddReg, unsigned &BaseReg,
1467                          int &Offset,
1468                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1469                          bool &isT2);
1470    bool RescheduleOps(MachineBasicBlock *MBB,
1471                       SmallVector<MachineInstr*, 4> &Ops,
1472                       unsigned Base, bool isLd,
1473                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1474    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1475  };
1476  char ARMPreAllocLoadStoreOpt::ID = 0;
1477}
1478
1479bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1480  TD  = Fn.getTarget().getDataLayout();
1481  TII = Fn.getTarget().getInstrInfo();
1482  TRI = Fn.getTarget().getRegisterInfo();
1483  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1484  MRI = &Fn.getRegInfo();
1485  MF  = &Fn;
1486
1487  bool Modified = false;
1488  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1489       ++MFI)
1490    Modified |= RescheduleLoadStoreInstrs(MFI);
1491
1492  return Modified;
1493}
1494
1495static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1496                                      MachineBasicBlock::iterator I,
1497                                      MachineBasicBlock::iterator E,
1498                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1499                                      SmallSet<unsigned, 4> &MemRegs,
1500                                      const TargetRegisterInfo *TRI) {
1501  // Are there stores / loads / calls between them?
1502  // FIXME: This is overly conservative. We should make use of alias information
1503  // some day.
1504  SmallSet<unsigned, 4> AddedRegPressure;
1505  while (++I != E) {
1506    if (I->isDebugValue() || MemOps.count(&*I))
1507      continue;
1508    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1509      return false;
1510    if (isLd && I->mayStore())
1511      return false;
1512    if (!isLd) {
1513      if (I->mayLoad())
1514        return false;
1515      // It's not safe to move the first 'str' down.
1516      // str r1, [r0]
1517      // strh r5, [r0]
1518      // str r4, [r0, #+4]
1519      if (I->mayStore())
1520        return false;
1521    }
1522    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1523      MachineOperand &MO = I->getOperand(j);
1524      if (!MO.isReg())
1525        continue;
1526      unsigned Reg = MO.getReg();
1527      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1528        return false;
1529      if (Reg != Base && !MemRegs.count(Reg))
1530        AddedRegPressure.insert(Reg);
1531    }
1532  }
1533
1534  // Estimate register pressure increase due to the transformation.
1535  if (MemRegs.size() <= 4)
1536    // Ok if we are moving small number of instructions.
1537    return true;
1538  return AddedRegPressure.size() <= MemRegs.size() * 2;
1539}
1540
1541
1542/// Copy Op0 and Op1 operands into a new array assigned to MI.
1543static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1544                                   MachineInstr *Op1) {
1545  assert(MI->memoperands_empty() && "expected a new machineinstr");
1546  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1547    + (Op1->memoperands_end() - Op1->memoperands_begin());
1548
1549  MachineFunction *MF = MI->getParent()->getParent();
1550  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1551  MachineSDNode::mmo_iterator MemEnd =
1552    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1553  MemEnd =
1554    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1555  MI->setMemRefs(MemBegin, MemEnd);
1556}
1557
1558bool
1559ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1560                                          DebugLoc &dl,
1561                                          unsigned &NewOpc, unsigned &EvenReg,
1562                                          unsigned &OddReg, unsigned &BaseReg,
1563                                          int &Offset, unsigned &PredReg,
1564                                          ARMCC::CondCodes &Pred,
1565                                          bool &isT2) {
1566  // Make sure we're allowed to generate LDRD/STRD.
1567  if (!STI->hasV5TEOps())
1568    return false;
1569
1570  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1571  unsigned Scale = 1;
1572  unsigned Opcode = Op0->getOpcode();
1573  if (Opcode == ARM::LDRi12)
1574    NewOpc = ARM::LDRD;
1575  else if (Opcode == ARM::STRi12)
1576    NewOpc = ARM::STRD;
1577  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1578    NewOpc = ARM::t2LDRDi8;
1579    Scale = 4;
1580    isT2 = true;
1581  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1582    NewOpc = ARM::t2STRDi8;
1583    Scale = 4;
1584    isT2 = true;
1585  } else
1586    return false;
1587
1588  // Make sure the base address satisfies i64 ld / st alignment requirement.
1589  if (!Op0->hasOneMemOperand() ||
1590      !(*Op0->memoperands_begin())->getValue() ||
1591      (*Op0->memoperands_begin())->isVolatile())
1592    return false;
1593
1594  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1595  const Function *Func = MF->getFunction();
1596  unsigned ReqAlign = STI->hasV6Ops()
1597    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1598    : 8;  // Pre-v6 need 8-byte align
1599  if (Align < ReqAlign)
1600    return false;
1601
1602  // Then make sure the immediate offset fits.
1603  int OffImm = getMemoryOpOffset(Op0);
1604  if (isT2) {
1605    int Limit = (1 << 8) * Scale;
1606    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1607      return false;
1608    Offset = OffImm;
1609  } else {
1610    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1611    if (OffImm < 0) {
1612      AddSub = ARM_AM::sub;
1613      OffImm = - OffImm;
1614    }
1615    int Limit = (1 << 8) * Scale;
1616    if (OffImm >= Limit || (OffImm & (Scale-1)))
1617      return false;
1618    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1619  }
1620  EvenReg = Op0->getOperand(0).getReg();
1621  OddReg  = Op1->getOperand(0).getReg();
1622  if (EvenReg == OddReg)
1623    return false;
1624  BaseReg = Op0->getOperand(1).getReg();
1625  Pred = getInstrPredicate(Op0, PredReg);
1626  dl = Op0->getDebugLoc();
1627  return true;
1628}
1629
1630namespace {
1631  struct OffsetCompare {
1632    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1633      int LOffset = getMemoryOpOffset(LHS);
1634      int ROffset = getMemoryOpOffset(RHS);
1635      assert(LHS == RHS || LOffset != ROffset);
1636      return LOffset > ROffset;
1637    }
1638  };
1639}
1640
1641bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1642                                 SmallVector<MachineInstr*, 4> &Ops,
1643                                 unsigned Base, bool isLd,
1644                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1645  bool RetVal = false;
1646
1647  // Sort by offset (in reverse order).
1648  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1649
1650  // The loads / stores of the same base are in order. Scan them from first to
1651  // last and check for the following:
1652  // 1. Any def of base.
1653  // 2. Any gaps.
1654  while (Ops.size() > 1) {
1655    unsigned FirstLoc = ~0U;
1656    unsigned LastLoc = 0;
1657    MachineInstr *FirstOp = 0;
1658    MachineInstr *LastOp = 0;
1659    int LastOffset = 0;
1660    unsigned LastOpcode = 0;
1661    unsigned LastBytes = 0;
1662    unsigned NumMove = 0;
1663    for (int i = Ops.size() - 1; i >= 0; --i) {
1664      MachineInstr *Op = Ops[i];
1665      unsigned Loc = MI2LocMap[Op];
1666      if (Loc <= FirstLoc) {
1667        FirstLoc = Loc;
1668        FirstOp = Op;
1669      }
1670      if (Loc >= LastLoc) {
1671        LastLoc = Loc;
1672        LastOp = Op;
1673      }
1674
1675      unsigned LSMOpcode
1676        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1677      if (LastOpcode && LSMOpcode != LastOpcode)
1678        break;
1679
1680      int Offset = getMemoryOpOffset(Op);
1681      unsigned Bytes = getLSMultipleTransferSize(Op);
1682      if (LastBytes) {
1683        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1684          break;
1685      }
1686      LastOffset = Offset;
1687      LastBytes = Bytes;
1688      LastOpcode = LSMOpcode;
1689      if (++NumMove == 8) // FIXME: Tune this limit.
1690        break;
1691    }
1692
1693    if (NumMove <= 1)
1694      Ops.pop_back();
1695    else {
1696      SmallPtrSet<MachineInstr*, 4> MemOps;
1697      SmallSet<unsigned, 4> MemRegs;
1698      for (int i = NumMove-1; i >= 0; --i) {
1699        MemOps.insert(Ops[i]);
1700        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1701      }
1702
1703      // Be conservative, if the instructions are too far apart, don't
1704      // move them. We want to limit the increase of register pressure.
1705      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1706      if (DoMove)
1707        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1708                                           MemOps, MemRegs, TRI);
1709      if (!DoMove) {
1710        for (unsigned i = 0; i != NumMove; ++i)
1711          Ops.pop_back();
1712      } else {
1713        // This is the new location for the loads / stores.
1714        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1715        while (InsertPos != MBB->end()
1716               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1717          ++InsertPos;
1718
1719        // If we are moving a pair of loads / stores, see if it makes sense
1720        // to try to allocate a pair of registers that can form register pairs.
1721        MachineInstr *Op0 = Ops.back();
1722        MachineInstr *Op1 = Ops[Ops.size()-2];
1723        unsigned EvenReg = 0, OddReg = 0;
1724        unsigned BaseReg = 0, PredReg = 0;
1725        ARMCC::CondCodes Pred = ARMCC::AL;
1726        bool isT2 = false;
1727        unsigned NewOpc = 0;
1728        int Offset = 0;
1729        DebugLoc dl;
1730        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1731                                             EvenReg, OddReg, BaseReg,
1732                                             Offset, PredReg, Pred, isT2)) {
1733          Ops.pop_back();
1734          Ops.pop_back();
1735
1736          const MCInstrDesc &MCID = TII->get(NewOpc);
1737          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1738          MRI->constrainRegClass(EvenReg, TRC);
1739          MRI->constrainRegClass(OddReg, TRC);
1740
1741          // Form the pair instruction.
1742          if (isLd) {
1743            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1744              .addReg(EvenReg, RegState::Define)
1745              .addReg(OddReg, RegState::Define)
1746              .addReg(BaseReg);
1747            // FIXME: We're converting from LDRi12 to an insn that still
1748            // uses addrmode2, so we need an explicit offset reg. It should
1749            // always by reg0 since we're transforming LDRi12s.
1750            if (!isT2)
1751              MIB.addReg(0);
1752            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1753            concatenateMemOperands(MIB, Op0, Op1);
1754            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1755            ++NumLDRDFormed;
1756          } else {
1757            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1758              .addReg(EvenReg)
1759              .addReg(OddReg)
1760              .addReg(BaseReg);
1761            // FIXME: We're converting from LDRi12 to an insn that still
1762            // uses addrmode2, so we need an explicit offset reg. It should
1763            // always by reg0 since we're transforming STRi12s.
1764            if (!isT2)
1765              MIB.addReg(0);
1766            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1767            concatenateMemOperands(MIB, Op0, Op1);
1768            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1769            ++NumSTRDFormed;
1770          }
1771          MBB->erase(Op0);
1772          MBB->erase(Op1);
1773
1774          // Add register allocation hints to form register pairs.
1775          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1776          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1777        } else {
1778          for (unsigned i = 0; i != NumMove; ++i) {
1779            MachineInstr *Op = Ops.back();
1780            Ops.pop_back();
1781            MBB->splice(InsertPos, MBB, Op);
1782          }
1783        }
1784
1785        NumLdStMoved += NumMove;
1786        RetVal = true;
1787      }
1788    }
1789  }
1790
1791  return RetVal;
1792}
1793
1794bool
1795ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1796  bool RetVal = false;
1797
1798  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1799  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1800  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1801  SmallVector<unsigned, 4> LdBases;
1802  SmallVector<unsigned, 4> StBases;
1803
1804  unsigned Loc = 0;
1805  MachineBasicBlock::iterator MBBI = MBB->begin();
1806  MachineBasicBlock::iterator E = MBB->end();
1807  while (MBBI != E) {
1808    for (; MBBI != E; ++MBBI) {
1809      MachineInstr *MI = MBBI;
1810      if (MI->isCall() || MI->isTerminator()) {
1811        // Stop at barriers.
1812        ++MBBI;
1813        break;
1814      }
1815
1816      if (!MI->isDebugValue())
1817        MI2LocMap[MI] = ++Loc;
1818
1819      if (!isMemoryOp(MI))
1820        continue;
1821      unsigned PredReg = 0;
1822      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1823        continue;
1824
1825      int Opc = MI->getOpcode();
1826      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1827      unsigned Base = MI->getOperand(1).getReg();
1828      int Offset = getMemoryOpOffset(MI);
1829
1830      bool StopHere = false;
1831      if (isLd) {
1832        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1833          Base2LdsMap.find(Base);
1834        if (BI != Base2LdsMap.end()) {
1835          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1836            if (Offset == getMemoryOpOffset(BI->second[i])) {
1837              StopHere = true;
1838              break;
1839            }
1840          }
1841          if (!StopHere)
1842            BI->second.push_back(MI);
1843        } else {
1844          SmallVector<MachineInstr*, 4> MIs;
1845          MIs.push_back(MI);
1846          Base2LdsMap[Base] = MIs;
1847          LdBases.push_back(Base);
1848        }
1849      } else {
1850        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1851          Base2StsMap.find(Base);
1852        if (BI != Base2StsMap.end()) {
1853          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1854            if (Offset == getMemoryOpOffset(BI->second[i])) {
1855              StopHere = true;
1856              break;
1857            }
1858          }
1859          if (!StopHere)
1860            BI->second.push_back(MI);
1861        } else {
1862          SmallVector<MachineInstr*, 4> MIs;
1863          MIs.push_back(MI);
1864          Base2StsMap[Base] = MIs;
1865          StBases.push_back(Base);
1866        }
1867      }
1868
1869      if (StopHere) {
1870        // Found a duplicate (a base+offset combination that's seen earlier).
1871        // Backtrack.
1872        --Loc;
1873        break;
1874      }
1875    }
1876
1877    // Re-schedule loads.
1878    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1879      unsigned Base = LdBases[i];
1880      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1881      if (Lds.size() > 1)
1882        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1883    }
1884
1885    // Re-schedule stores.
1886    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1887      unsigned Base = StBases[i];
1888      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1889      if (Sts.size() > 1)
1890        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1891    }
1892
1893    if (MBBI != E) {
1894      Base2LdsMap.clear();
1895      Base2StsMap.clear();
1896      LdBases.clear();
1897      StBases.clear();
1898    }
1899  }
1900
1901  return RetVal;
1902}
1903
1904
1905/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1906/// optimization pass.
1907FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1908  if (PreAlloc)
1909    return new ARMPreAllocLoadStoreOpt();
1910  return new ARMLoadStoreOpt();
1911}
1912