1dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//
336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//                     The LLVM Compiler Infrastructure
436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//
536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// This file is distributed under the University of Illinois Open Source
636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// License. See LICENSE.TXT for details.
736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//
836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//===----------------------------------------------------------------------===//
936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//
1036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// This file contains a pass that performs load / store related peephole
1136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// optimizations. This pass should be run after register allocation.
1236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//
1336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//===----------------------------------------------------------------------===//
1436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
15dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "AArch64InstrInfo.h"
1637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64Subtarget.h"
17dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "MCTargetDesc/AArch64AddressingModes.h"
1836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/ADT/BitVector.h"
1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/ADT/Statistic.h"
2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineBasicBlock.h"
2136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineFunctionPass.h"
2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineInstr.h"
2336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineInstrBuilder.h"
2436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/CommandLine.h"
2536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/Debug.h"
2636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/ErrorHandling.h"
2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/raw_ostream.h"
2837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Target/TargetInstrInfo.h"
2937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Target/TargetMachine.h"
3037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Target/TargetRegisterInfo.h"
3136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesusing namespace llvm;
3236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "aarch64-ldst-opt"
34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
3636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// load / store instructions to form ldp / stp instructions.
3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumPostFolded, "Number of post-index updates folded");
4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumPreFolded, "Number of pre-index updates folded");
4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumUnscaledPairCreated,
4236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          "Number of load/store from unscaled generated");
4336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
44c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
45c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                                   cl::init(20), cl::Hidden);
4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Place holder while testing unscaled load/store combining
48c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic cl::opt<bool> EnableAArch64UnscaledMemOp(
49c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    "aarch64-unscaled-mem-op", cl::Hidden,
50c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesnamespace {
53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstruct AArch64LoadStoreOpt : public MachineFunctionPass {
5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  static char ID;
55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
57dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const AArch64InstrInfo *TII;
5836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const TargetRegisterInfo *TRI;
5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
6036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Scan the instructions looking for a load/store that can be combined
6136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // with the current instruction into a load/store pair.
6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Return the matching instruction if one is found, else MBB->end().
63c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // If a matching instruction is found, MergeForward is set to true if the
6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // merge is to remove the first instruction and replace the second with
6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // a pair-wise insn, and false if the reverse is true.
664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // \p SExtIdx[out] gives the index of the result of the load pair that
674c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // must be extended. The value of SExtIdx assumes that the paired load
684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // produces the value in this order: (I, returned iterator), i.e.,
694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // -1 means no value has to be extended, 0 means I, and 1 means the
704c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // returned iterator.
7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
724c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                                               bool &MergeForward, int &SExtIdx,
7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                               unsigned Limit);
7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Merge the two instructions indicated into a single pair-wise instruction.
75c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // If MergeForward is true, erase the first instruction and fold its
7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // operation into the second. If false, the reverse. Return the instruction
77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // following the first instruction (which may change during processing).
784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // \p SExtIdx index of the result that must be extended for a paired load.
794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  // -1 means none, 0 means I, and 1 means Paired.
8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator
8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  mergePairedInsns(MachineBasicBlock::iterator I,
824c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                   MachineBasicBlock::iterator Paired, bool MergeForward,
834c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                   int SExtIdx);
8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Scan the instruction list to find a base register update that can
8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // be combined with the current instruction (a load or store) using
8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // pre or post indexed addressing with writeback. Scan forwards.
8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator
8936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                int Value);
9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Scan the instruction list to find a base register update that can
9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // be combined with the current instruction (a load or store) using
9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // pre or post indexed addressing with writeback. Scan backwards.
9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator
9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Merge a pre-index base register update into a ld/st instruction.
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator
10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                        MachineBasicBlock::iterator Update);
10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Merge a post-index base register update into a ld/st instruction.
10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator
10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                         MachineBasicBlock::iterator Update);
10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool optimizeBlock(MachineBasicBlock &MBB);
10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
110dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool runOnMachineFunction(MachineFunction &Fn) override;
11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const char *getPassName() const override {
113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return "AArch64 load / store optimization pass";
11436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
11536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
11636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesprivate:
11736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int getMemSize(MachineInstr *MemMI);
11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines};
119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hineschar AArch64LoadStoreOpt::ID = 0;
12037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} // namespace
12136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
12236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool isUnscaledLdst(unsigned Opc) {
12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch (Opc) {
12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
12536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURSi:
12736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURDi:
12936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURQi:
13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
132dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURWi:
13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
134dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURXi:
13536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURSi:
13736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
138dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURDi:
13936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
140dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURQi:
14136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURWi:
14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURXi:
14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return true;
146ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDURSWi:
147ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return true;
14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
14936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
15036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
15136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Size in bytes of the data moved by an unscaled load or store
152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesint AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch (MemMI->getOpcode()) {
15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
155c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    llvm_unreachable("Opcode has unknown size!");
156dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRSui:
157dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURSi:
15836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 4;
159dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRDui:
160dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURDi:
16136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 8;
162dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRQui:
163dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURQi:
16436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 16;
165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRWui:
166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURWi:
16736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 4;
168dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRXui:
169dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURXi:
17036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 8;
171dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRSui:
172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURSi:
17336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 4;
174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRDui:
175dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURDi:
17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 8;
177dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRQui:
178dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURQi:
17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 16;
180dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRWui:
181dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURWi:
18236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 4;
183dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRXui:
184dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURXi:
18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return 8;
186ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDRSWui:
187ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDURSWi:
188ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return 4;
18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
1924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarstatic unsigned getMatchingNonSExtOpcode(unsigned Opc,
1934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                                         bool *IsValidLdStrOpc = nullptr) {
1944c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  if (IsValidLdStrOpc)
1954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    *IsValidLdStrOpc = true;
1964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  switch (Opc) {
1974c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  default:
1984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    if (IsValidLdStrOpc)
1994c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      *IsValidLdStrOpc = false;
2004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    return UINT_MAX;
2014c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STRDui:
2024c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STURDi:
2034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STRQui:
2044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STURQi:
2054c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STRWui:
2064c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STURWi:
2074c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STRXui:
2084c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STURXi:
2094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDRDui:
2104c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDURDi:
2114c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDRQui:
2124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDURQi:
2134c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDRWui:
2144c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDURWi:
2154c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDRXui:
2164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDURXi:
2174c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STRSui:
2184c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::STURSi:
2194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDRSui:
2204c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDURSi:
2214c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    return Opc;
2224c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDRSWui:
2234c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    return AArch64::LDRWui;
2244c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  case AArch64::LDURSWi:
2254c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    return AArch64::LDURWi;
2264c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  }
2274c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar}
2284c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar
22936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic unsigned getMatchingPairOpcode(unsigned Opc) {
23036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch (Opc) {
23136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
23236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    llvm_unreachable("Opcode has no pairwise equivalent!");
233dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRSui:
234dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURSi:
235dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STPSi;
236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRDui:
237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURDi:
238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STPDi;
239dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRQui:
240dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURQi:
241dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STPQi;
242dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRWui:
243dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURWi:
244dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STPWi;
245dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRXui:
246dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STURXi:
247dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STPXi;
248dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRSui:
249dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURSi:
250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDPSi;
251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRDui:
252dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURDi:
253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDPDi;
254dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRQui:
255dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURQi:
256dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDPQi;
257dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRWui:
258dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURWi:
259dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDPWi;
260dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRXui:
261dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDURXi:
262dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDPXi;
263ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDRSWui:
264ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDURSWi:
265ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return AArch64::LDPSWi;
26636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
26736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
26836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
26936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic unsigned getPreIndexedOpcode(unsigned Opc) {
27036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch (Opc) {
27136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
27236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    llvm_unreachable("Opcode has no pre-indexed equivalent!");
273c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::STRSui:
274c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::STRSpre;
275c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::STRDui:
276c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::STRDpre;
277c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::STRQui:
278c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::STRQpre;
279c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::STRWui:
280c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::STRWpre;
281c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::STRXui:
282c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::STRXpre;
283c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::LDRSui:
284c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::LDRSpre;
285c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::LDRDui:
286c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::LDRDpre;
287c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::LDRQui:
288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::LDRQpre;
289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::LDRWui:
290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::LDRWpre;
291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  case AArch64::LDRXui:
292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    return AArch64::LDRXpre;
293ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDRSWui:
294ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return AArch64::LDRSWpre;
29536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
29636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
29736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
29836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic unsigned getPostIndexedOpcode(unsigned Opc) {
29936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch (Opc) {
30036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    llvm_unreachable("Opcode has no post-indexed wise equivalent!");
302dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRSui:
303dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STRSpost;
304dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRDui:
305dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STRDpost;
306dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRQui:
307dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STRQpost;
308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRWui:
309dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STRWpost;
310dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::STRXui:
311dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::STRXpost;
312dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRSui:
313dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDRSpost;
314dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRDui:
315dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDRDpost;
316dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRQui:
317dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDRQpost;
318dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRWui:
319dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDRWpost;
320dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::LDRXui:
321dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return AArch64::LDRXpost;
322ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AArch64::LDRSWui:
323ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return AArch64::LDRSWpost;
32436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
32536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
32636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
32736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineBasicBlock::iterator
328dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
329dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                      MachineBasicBlock::iterator Paired,
3304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                                      bool MergeForward, int SExtIdx) {
33136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator NextI = I;
33236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ++NextI;
33336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // If NextI is the second of the two instructions to be merged, we need
33436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // to skip one further. Either way we merge will invalidate the iterator,
33536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // and we don't need to scan the new instruction, as it's a pairwise
33636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // instruction, which we're not considering for further action anyway.
33736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (NextI == Paired)
33836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ++NextI;
33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
3404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  unsigned Opc =
3414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
3424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  bool IsUnscaled = isUnscaledLdst(Opc);
343dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  int OffsetStride =
344dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
34536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
3464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  unsigned NewOpc = getMatchingPairOpcode(Opc);
34736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Insert our new paired instruction after whichever of the paired
348c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // instructions MergeForward indicates.
349c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
350c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // Also based on MergeForward is from where we copy the base register operand
35136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // so we get the flags compatible with the input code.
35236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineOperand &BaseRegOp =
353c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      MergeForward ? Paired->getOperand(1) : I->getOperand(1);
35436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
35536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Which register is Rt and which is Rt2 depends on the offset order.
35636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstr *RtMI, *Rt2MI;
35736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (I->getOperand(2).getImm() ==
35836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Paired->getOperand(2).getImm() + OffsetStride) {
35936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    RtMI = Paired;
36036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Rt2MI = I;
3614c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Here we swapped the assumption made for SExtIdx.
3624c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // I.e., we turn ldp I, Paired into ldp Paired, I.
3634c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Update the index accordingly.
3644c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    if (SExtIdx != -1)
3654c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      SExtIdx = (SExtIdx + 1) % 2;
36636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  } else {
36736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    RtMI = I;
36836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Rt2MI = Paired;
36936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
37036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Handle Unscaled
37136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int OffsetImm = RtMI->getOperand(2).getImm();
372dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (IsUnscaled && EnableAArch64UnscaledMemOp)
37336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    OffsetImm /= OffsetStride;
37436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
37536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Construct the new instruction.
37636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
37736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                    I->getDebugLoc(), TII->get(NewOpc))
37836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                .addOperand(RtMI->getOperand(0))
37936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                .addOperand(Rt2MI->getOperand(0))
38036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                .addOperand(BaseRegOp)
38136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                .addImm(OffsetImm);
38236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  (void)MIB;
38336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
38436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // FIXME: Do we need/want to copy the mem operands from the source
38536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        instructions? Probably. What uses them after this?
38636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
38736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n    ");
38836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(I->print(dbgs()));
38936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "    ");
39036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(Paired->print(dbgs()));
39136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "  with instruction:\n    ");
3924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar
3934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  if (SExtIdx != -1) {
3944c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Generate the sign extension for the proper result of the ldp.
3954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // I.e., with X1, that would be:
3964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // %W1<def> = KILL %W1, %X1<imp-def>
3974c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // %X1<def> = SBFMXri %X1<kill>, 0, 31
3984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    MachineOperand &DstMO = MIB->getOperand(SExtIdx);
3994c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Right now, DstMO has the extended register, since it comes from an
4004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // extended opcode.
4014c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    unsigned DstRegX = DstMO.getReg();
4024c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Get the W variant of that register.
4034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
4044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Update the result of LDP to use the W instead of the X variant.
4054c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DstMO.setReg(DstRegW);
4064c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(((MachineInstr *)MIB)->print(dbgs()));
4074c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(dbgs() << "\n");
4084c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Make the machine verifier happy by providing a definition for
4094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // the X register.
4104c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Insert this definition right after the generated LDP, i.e., before
4114c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // InsertionPoint.
4124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    MachineInstrBuilder MIBKill =
4134c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
4144c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                TII->get(TargetOpcode::KILL), DstRegW)
4154c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar            .addReg(DstRegW)
4164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar            .addReg(DstRegX, RegState::Define);
4174c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    MIBKill->getOperand(2).setImplicit();
4184c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    // Create the sign extension.
4194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    MachineInstrBuilder MIBSXTW =
4204c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
4214c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                TII->get(AArch64::SBFMXri), DstRegX)
4224c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar            .addReg(DstRegX)
4234c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar            .addImm(0)
4244c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar            .addImm(31);
4254c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    (void)MIBSXTW;
4264c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(dbgs() << "  Extend operand:\n    ");
4274c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
4284c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(dbgs() << "\n");
4294c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  } else {
4304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(((MachineInstr *)MIB)->print(dbgs()));
4314c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    DEBUG(dbgs() << "\n");
4324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  }
43336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
43436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Erase the old instructions.
43536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  I->eraseFromParent();
43636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Paired->eraseFromParent();
43736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
43836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return NextI;
43936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
44036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
44136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// trackRegDefsUses - Remember what registers the specified instruction uses
44236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// and modifies.
44336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
44436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                             BitVector &UsedRegs,
44536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                             const TargetRegisterInfo *TRI) {
44636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
44736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MachineOperand &MO = MI->getOperand(i);
44836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MO.isRegMask())
44936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      ModifiedRegs.setBitsNotInMask(MO.getRegMask());
45036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
45136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (!MO.isReg())
45236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      continue;
45336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    unsigned Reg = MO.getReg();
45436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MO.isDef()) {
45536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
45636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ModifiedRegs.set(*AI);
45736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    } else {
45836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      assert(MO.isUse() && "Reg operand not a def and not a use?!?");
45936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
46036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        UsedRegs.set(*AI);
46136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
46236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
46336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
46436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
46536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
46636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (!IsUnscaled && (Offset > 63 || Offset < -64))
46736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return false;
46836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (IsUnscaled) {
46936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Convert the byte-offset used by unscaled into an "element" offset used
47036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // by the scaled pair load/store instructions.
471c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    int ElemOffset = Offset / OffsetStride;
472c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    if (ElemOffset > 63 || ElemOffset < -64)
47336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return false;
47436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
47536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return true;
47636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
47736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
47836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Do alignment, specialized to power of 2 and for signed ints,
47936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// avoiding having to do a C-style cast from uint_64t to int when
48036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// using RoundUpToAlignment from include/llvm/Support/MathExtras.h.
48136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// FIXME: Move this function to include/MathExtras.h?
48236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic int alignTo(int Num, int PowOf2) {
48336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return (Num + PowOf2 - 1) & ~(PowOf2 - 1);
48436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
48536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
48636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// findMatchingInsn - Scan the instructions looking for a load/store that can
48736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// be combined with the current instruction into a load/store pair.
48836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineBasicBlock::iterator
489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
4904c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                                      bool &MergeForward, int &SExtIdx,
4914c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                                      unsigned Limit) {
49236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator E = I->getParent()->end();
49336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator MBBI = I;
49436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstr *FirstMI = I;
49536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ++MBBI;
49636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
49736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int Opc = FirstMI->getOpcode();
498c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  bool MayLoad = FirstMI->mayLoad();
49936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool IsUnscaled = isUnscaledLdst(Opc);
50036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned Reg = FirstMI->getOperand(0).getReg();
50136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned BaseReg = FirstMI->getOperand(1).getReg();
50236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int Offset = FirstMI->getOperand(2).getImm();
50336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
50436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Early exit if the first instruction modifies the base register.
50536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // e.g., ldr x0, [x0]
50636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Early exit if the offset if not possible to match. (6 bits of positive
50736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // range, plus allow an extra one in case we find a later insn that matches
50836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // with Offset-1
50936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (FirstMI->modifiesRegister(BaseReg, TRI))
51036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return E;
51136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int OffsetStride =
512dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
51336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
51436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return E;
51536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
51636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Track which registers have been modified and used between the first insn
51736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // (inclusive) and the second insn.
51836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  BitVector ModifiedRegs, UsedRegs;
51936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ModifiedRegs.resize(TRI->getNumRegs());
52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  UsedRegs.resize(TRI->getNumRegs());
52136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
52236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MachineInstr *MI = MBBI;
52336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
52436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // optimization by changing how far we scan.
52536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MI->isDebugValue())
52636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      continue;
52736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
52836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Now that we know this is a real instruction, count it.
52936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ++Count;
53036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
5314c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    bool CanMergeOpc = Opc == MI->getOpcode();
5324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    SExtIdx = -1;
5334c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    if (!CanMergeOpc) {
5344c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      bool IsValidLdStrOpc;
5354c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
5364c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      if (!IsValidLdStrOpc)
5374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        continue;
5384c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      // Opc will be the first instruction in the pair.
5394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
5404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
5414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    }
5424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar
5434c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    if (CanMergeOpc && MI->getOperand(2).isImm()) {
54436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // If we've found another instruction with the same opcode, check to see
54536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // if the base and offset are compatible with our starting instruction.
54636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // These instructions all have scaled immediate operands, so we just
54736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // check for +1/-1. Make sure to check the new instruction offset is
54836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // actually an immediate and not a symbolic reference destined for
54936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // a relocation.
55036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      //
55136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Pairwise instructions have a 7-bit signed offset field. Single insns
55236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // have a 12-bit unsigned offset field. To be a valid combine, the
55336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // final offset must be in range.
55436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      unsigned MIBaseReg = MI->getOperand(1).getReg();
55536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      int MIOffset = MI->getOperand(2).getImm();
55636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
55736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                   (Offset + OffsetStride == MIOffset))) {
55836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        int MinOffset = Offset < MIOffset ? Offset : MIOffset;
55936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If this is a volatile load/store that otherwise matched, stop looking
56036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // as something is going on that we don't have enough information to
56136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // safely transform. Similarly, stop if we see a hint to avoid pairs.
56236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI))
56336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          return E;
56436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If the resultant immediate offset of merging these instructions
56536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // is out of range for a pairwise instruction, bail and keep looking.
56636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
56736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
56836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
56936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          continue;
57036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
57136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If the alignment requirements of the paired (scaled) instruction
57236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // can't express the offset of the unscaled input, bail and keep
57336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // looking.
574dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        if (IsUnscaled && EnableAArch64UnscaledMemOp &&
57536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            (alignTo(MinOffset, OffsetStride) != MinOffset)) {
57636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
57736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          continue;
57836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
57936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If the destination register of the loads is the same register, bail
58036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // and keep looking. A load-pair instruction with both destination
58136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // registers the same is UNPREDICTABLE and will result in an exception.
582c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines        if (MayLoad && Reg == MI->getOperand(0).getReg()) {
58336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
58436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          continue;
58536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
58636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
58736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // If the Rt of the second instruction was not modified or used between
58836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // the two instructions, we can combine the second into the first.
58936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
59036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            !UsedRegs[MI->getOperand(0).getReg()]) {
591c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          MergeForward = false;
59236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          return MBBI;
59336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
59436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
59536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Likewise, if the Rt of the first instruction is not modified or used
59636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // between the two instructions, we can combine the first into the
59736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // second.
59836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
59936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            !UsedRegs[FirstMI->getOperand(0).getReg()]) {
600c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines          MergeForward = true;
60136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          return MBBI;
60236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        }
60336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Unable to combine these instructions due to interference in between.
60436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Keep looking.
60536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
60636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
60736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
60836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // If the instruction wasn't a matching load or store, but does (or can)
60936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // modify memory, stop searching, as we don't have alias analysis or
61036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // anything like that to tell us whether the access is tromping on the
61136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // locations we care about. The big one we want to catch is calls.
61236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    //
61336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // FIXME: Theoretically, we can do better than that for SP and FP based
61436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // references since we can effectively know where those are touching. It's
61536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // unclear if it's worth the extra code, though. Most paired instructions
61636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // will be sequential, perhaps with a few intervening non-memory related
61736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // instructions.
61836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MI->mayStore() || MI->isCall())
61936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return E;
62036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Likewise, if we're matching a store instruction, we don't want to
62136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // move across a load, as it may be reading the same location.
62236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (FirstMI->mayStore() && MI->mayLoad())
62336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return E;
62436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
62536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Update modified / uses register lists.
62636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
62736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
62836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Otherwise, if the base register is modified, we have no match, so
62936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // return early.
63036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (ModifiedRegs[BaseReg])
63136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return E;
63236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
63336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return E;
63436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
63536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
63636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineBasicBlock::iterator
637dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
638dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                           MachineBasicBlock::iterator Update) {
639dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  assert((Update->getOpcode() == AArch64::ADDXri ||
640dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Update->getOpcode() == AArch64::SUBXri) &&
64136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         "Unexpected base register update instruction to merge!");
64236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator NextI = I;
64336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Return the instruction following the merged instruction, which is
64436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // the instruction following our unmerged load. Unless that's the add/sub
64536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // instruction we're merging, in which case it's the one after that.
64636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (++NextI == Update)
64736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ++NextI;
64836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
64936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int Value = Update->getOperand(2).getImm();
650dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
65136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         "Can't merge 1 << 12 offset into pre-indexed load / store");
652dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (Update->getOpcode() == AArch64::SUBXri)
65336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Value = -Value;
65436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
65536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
65636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstrBuilder MIB =
65736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
658dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          .addOperand(Update->getOperand(0))
65936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          .addOperand(I->getOperand(0))
66036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          .addOperand(I->getOperand(1))
66136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          .addImm(Value);
66236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  (void)MIB;
66336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
66436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "Creating pre-indexed load/store.");
66536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "    Replacing instructions:\n    ");
66636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(I->print(dbgs()));
66736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "    ");
66836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(Update->print(dbgs()));
66936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "  with instruction:\n    ");
67036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
67136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "\n");
67236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
67336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Erase the old instructions for the block.
67436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  I->eraseFromParent();
67536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Update->eraseFromParent();
67636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
67736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return NextI;
67836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
67936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
680dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesMachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
681dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
682dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  assert((Update->getOpcode() == AArch64::ADDXri ||
683dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Update->getOpcode() == AArch64::SUBXri) &&
68436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         "Unexpected base register update instruction to merge!");
68536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator NextI = I;
68636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Return the instruction following the merged instruction, which is
68736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // the instruction following our unmerged load. Unless that's the add/sub
68836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // instruction we're merging, in which case it's the one after that.
68936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (++NextI == Update)
69036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ++NextI;
69136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
69236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int Value = Update->getOperand(2).getImm();
693dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
69436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         "Can't merge 1 << 12 offset into post-indexed load / store");
695dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (Update->getOpcode() == AArch64::SUBXri)
69636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Value = -Value;
69736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
69836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
69936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstrBuilder MIB =
70036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
701dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          .addOperand(Update->getOperand(0))
70236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          .addOperand(I->getOperand(0))
70336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          .addOperand(I->getOperand(1))
70436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          .addImm(Value);
70536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  (void)MIB;
70636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
70736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "Creating post-indexed load/store.");
70836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "    Replacing instructions:\n    ");
70936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(I->print(dbgs()));
71036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "    ");
71136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(Update->print(dbgs()));
71236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "  with instruction:\n    ");
71336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
71436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DEBUG(dbgs() << "\n");
71536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
71636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Erase the old instructions for the block.
71736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  I->eraseFromParent();
71836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  Update->eraseFromParent();
71936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
72036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return NextI;
72136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
72236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
72336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
72436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                 int Offset) {
72536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  switch (MI->getOpcode()) {
72636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  default:
72736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    break;
728dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::SUBXri:
72936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Negate the offset for a SUB instruction.
73036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Offset *= -1;
73136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // FALLTHROUGH
732dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AArch64::ADDXri:
73336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Make sure it's a vanilla immediate operand, not a relocation or
73436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // anything else we can't handle.
73536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (!MI->getOperand(2).isImm())
73636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      break;
73736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Watch out for 1 << 12 shifted value.
738dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
73936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      break;
74036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // If the instruction has the base register as source and dest and the
74136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // immediate will fit in a signed 9-bit integer, then we have a match.
74236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MI->getOperand(0).getReg() == BaseReg &&
74336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        MI->getOperand(1).getReg() == BaseReg &&
74436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        MI->getOperand(2).getImm() <= 255 &&
74536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        MI->getOperand(2).getImm() >= -256) {
74636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // If we have a non-zero Offset, we check that it matches the amount
74736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // we're adding to the register.
74836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (!Offset || Offset == MI->getOperand(2).getImm())
74936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        return true;
75036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
75136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    break;
75236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
75336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return false;
75436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
75536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
756dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
757dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineBasicBlock::iterator I, unsigned Limit, int Value) {
75836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator E = I->getParent()->end();
75936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstr *MemMI = I;
76036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator MBBI = I;
76136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const MachineFunction &MF = *MemMI->getParent()->getParent();
76236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
76336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned DestReg = MemMI->getOperand(0).getReg();
76436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned BaseReg = MemMI->getOperand(1).getReg();
76536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int Offset = MemMI->getOperand(2).getImm() *
76636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines               TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
76736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
76836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // If the base register overlaps the destination register, we can't
76936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // merge the update.
77036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
77136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return E;
77236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
77336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Scan forward looking for post-index opportunities.
77436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Updating instructions can't be formed if the memory insn already
77536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // has an offset other than the value we're looking for.
77636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Offset != Value)
77736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return E;
77836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
77936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Track which registers have been modified and used between the first insn
78036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // (inclusive) and the second insn.
78136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  BitVector ModifiedRegs, UsedRegs;
78236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ModifiedRegs.resize(TRI->getNumRegs());
78336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  UsedRegs.resize(TRI->getNumRegs());
78436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ++MBBI;
78536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (unsigned Count = 0; MBBI != E; ++MBBI) {
78636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MachineInstr *MI = MBBI;
78736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
78836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // optimization by changing how far we scan.
78936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MI->isDebugValue())
79036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      continue;
79136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
79236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Now that we know this is a real instruction, count it.
79336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ++Count;
79436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
79536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // If we found a match, return it.
79636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (isMatchingUpdateInsn(MI, BaseReg, Value))
79736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return MBBI;
79836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
79936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Update the status of what the instruction clobbered and used.
80036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
80136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
80236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Otherwise, if the base register is used or modified, we have no match, so
80336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // return early.
80436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
80536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return E;
80636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
80736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return E;
80836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
80936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
810dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
811dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineBasicBlock::iterator I, unsigned Limit) {
81236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator B = I->getParent()->begin();
81336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator E = I->getParent()->end();
81436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineInstr *MemMI = I;
81536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MachineBasicBlock::iterator MBBI = I;
81636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const MachineFunction &MF = *MemMI->getParent()->getParent();
81736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
81836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned DestReg = MemMI->getOperand(0).getReg();
81936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned BaseReg = MemMI->getOperand(1).getReg();
82036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  int Offset = MemMI->getOperand(2).getImm();
82136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
82236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
82336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // If the load/store is the first instruction in the block, there's obviously
82436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // not any matching update. Ditto if the memory offset isn't zero.
82536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (MBBI == B || Offset != 0)
82636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return E;
82736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // If the base register overlaps the destination register, we can't
82836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // merge the update.
82936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
83036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return E;
83136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
83236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Track which registers have been modified and used between the first insn
83336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // (inclusive) and the second insn.
83436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  BitVector ModifiedRegs, UsedRegs;
83536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  ModifiedRegs.resize(TRI->getNumRegs());
83636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  UsedRegs.resize(TRI->getNumRegs());
83736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  --MBBI;
83836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (unsigned Count = 0; MBBI != B; --MBBI) {
83936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MachineInstr *MI = MBBI;
84036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
84136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // optimization by changing how far we scan.
84236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (MI->isDebugValue())
84336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      continue;
84436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
84536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Now that we know this is a real instruction, count it.
84636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    ++Count;
84736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
84836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // If we found a match, return it.
84936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
85036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return MBBI;
85136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
85236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Update the status of what the instruction clobbered and used.
85336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
85436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
85536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Otherwise, if the base register is used or modified, we have no match, so
85636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // return early.
85736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg])
85836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      return E;
85936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
86036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return E;
86136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
86236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
863dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
86436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool Modified = false;
86536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Two tranformations to do here:
86636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // 1) Find loads and stores that can be merged into a single load or store
86736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //    pair instruction.
86836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //      e.g.,
86936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ldr x0, [x2]
87036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ldr x1, [x2, #8]
87136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ; becomes
87236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ldp x0, x1, [x2]
87336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // 2) Find base register updates that can be merged into the load or store
87436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //    as a base-reg writeback.
87536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //      e.g.,
87636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ldr x0, [x2]
87736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        add x2, x2, #4
87836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ; becomes
87936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  //        ldr x0, [x2], #4
88036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
88136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
88236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines       MBBI != E;) {
88336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MachineInstr *MI = MBBI;
88436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    switch (MI->getOpcode()) {
88536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    default:
88636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Just move on to the next instruction.
88736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      ++MBBI;
88836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      break;
889dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRSui:
890dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRDui:
891dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRQui:
892dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRXui:
893dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRWui:
894dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRSui:
895dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRDui:
896dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRQui:
897dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRXui:
898dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRWui:
899ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    case AArch64::LDRSWui:
90036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // do the unscaled versions as well
901dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURSi:
902dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURDi:
903dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURQi:
904dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURWi:
905dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURXi:
906dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURSi:
907dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURDi:
908dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURQi:
909dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURWi:
910ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    case AArch64::LDURXi:
911ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    case AArch64::LDURSWi: {
91236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // If this is a volatile load/store, don't mess with it.
91336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (MI->hasOrderedMemoryRef()) {
91436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++MBBI;
91536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
91636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
91736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Make sure this is a reg+imm (as opposed to an address reloc).
91836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (!MI->getOperand(2).isImm()) {
91936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++MBBI;
92036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
92136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
92236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Check if this load/store has a hint to avoid pair formation.
923dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
92436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (TII->isLdStPairSuppressed(MI)) {
92536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++MBBI;
92636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
92736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
92836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Look ahead up to ScanLimit instructions for a pairable instruction.
929c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines      bool MergeForward = false;
9304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      int SExtIdx = -1;
93136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      MachineBasicBlock::iterator Paired =
9324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar          findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
93336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (Paired != E) {
93436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Merge the loads into a pair. Keeping the iterator straight is a
93536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // pain, so we let the merge routine tell us what the next instruction
93636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // is after it's done mucking about.
9374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar        MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
93836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
93936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Modified = true;
94036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++NumPairCreated;
94136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (isUnscaledLdst(MI->getOpcode()))
94236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          ++NumUnscaledPairCreated;
94336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
94436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
94536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      ++MBBI;
94636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      break;
94736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
94836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // FIXME: Do the other instructions.
94936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
95036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
95136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
95236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
95336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines       MBBI != E;) {
95436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MachineInstr *MI = MBBI;
95536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // Do update merging. It's simpler to keep this separate from the above
95636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // switch, though not strictly necessary.
95736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    int Opc = MI->getOpcode();
95836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    switch (Opc) {
95936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    default:
96036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Just move on to the next instruction.
96136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      ++MBBI;
96236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      break;
963dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRSui:
964dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRDui:
965dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRQui:
966dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRXui:
967dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STRWui:
968dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRSui:
969dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRDui:
970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRQui:
971dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRXui:
972dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDRWui:
97336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    // do the unscaled versions as well
974dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURSi:
975dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURDi:
976dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURQi:
977dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURWi:
978dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::STURXi:
979dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURSi:
980dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURDi:
981dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURQi:
982dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURWi:
983dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    case AArch64::LDURXi: {
98436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Make sure this is a reg+imm (as opposed to an address reloc).
98536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (!MI->getOperand(2).isImm()) {
98636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++MBBI;
98736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
98836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
98936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Look ahead up to ScanLimit instructions for a mergable instruction.
99036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      MachineBasicBlock::iterator Update =
99136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
99236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (Update != E) {
99336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Merge the update into the ld/st.
99436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        MBBI = mergePostIdxUpdateInsn(MBBI, Update);
99536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Modified = true;
99636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++NumPostFolded;
99736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
99836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
99936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Don't know how to handle pre/post-index versions, so move to the next
100036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // instruction.
100136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (isUnscaledLdst(Opc)) {
100236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++MBBI;
100336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
100436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
100536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
100636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Look back to try to find a pre-index instruction. For example,
100736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // add x0, x0, #8
100836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // ldr x1, [x0]
100936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      //   merged into:
101036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // ldr x1, [x0, #8]!
101136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
101236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (Update != E) {
101336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Merge the update into the ld/st.
101436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
101536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Modified = true;
101636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++NumPreFolded;
101736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
101836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
101936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
102036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Look forward to try to find a post-index instruction. For example,
102136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // ldr x1, [x0, #64]
102236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // add x0, x0, #64
102336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      //   merged into:
1024dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // ldr x1, [x0, #64]!
102536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
102636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // The immediate in the load/store is scaled by the size of the register
102736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // being loaded. The immediate in the add we're looking for,
102836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // however, is not, so adjust here.
102936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      int Value = MI->getOperand(2).getImm() *
103036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                  TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
103136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                      ->getSize();
103236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
103336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (Update != E) {
103436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        // Merge the update into the ld/st.
103536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
103636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Modified = true;
103736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        ++NumPreFolded;
103836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
103936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
104036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
104136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // Nothing found. Just move to the next instruction.
104236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      ++MBBI;
104336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      break;
104436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
104536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      // FIXME: Do the other instructions.
104636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
104736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
104836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
104936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return Modified;
105036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
105136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
1052dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
1054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  TRI = Fn.getSubtarget().getRegisterInfo();
105536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
105636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  bool Modified = false;
105736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (auto &MBB : Fn)
105836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Modified |= optimizeBlock(MBB);
105936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
106036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return Modified;
106136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
106236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
106336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
106436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// loads and stores near one another?
106536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
106636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
106736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// optimization pass.
1068dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesFunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
1069dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return new AArch64LoadStoreOpt();
107036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
1071