1dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// 236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// The LLVM Compiler Infrastructure 436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// This file is distributed under the University of Illinois Open Source 636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// License. See LICENSE.TXT for details. 736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//===----------------------------------------------------------------------===// 936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 1036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// This file contains a pass that performs load / store related peephole 1136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// optimizations. This pass should be run after register allocation. 1236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// 1336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines//===----------------------------------------------------------------------===// 1436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 15dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "AArch64InstrInfo.h" 1637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64Subtarget.h" 17dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "MCTargetDesc/AArch64AddressingModes.h" 1836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/ADT/BitVector.h" 1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/ADT/Statistic.h" 2036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineBasicBlock.h" 2136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineFunctionPass.h" 2236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineInstr.h" 2336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineInstrBuilder.h" 2436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/CommandLine.h" 2536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/Debug.h" 2636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/ErrorHandling.h" 2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/Support/raw_ostream.h" 2837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Target/TargetInstrInfo.h" 2937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Target/TargetMachine.h" 3037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Target/TargetRegisterInfo.h" 3136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesusing namespace llvm; 3236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "aarch64-ldst-opt" 34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine 3636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// load / store instructions to form ldp / stp instructions. 3736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); 3936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumPostFolded, "Number of post-index updates folded"); 4036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumPreFolded, "Number of pre-index updates folded"); 4136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSTATISTIC(NumUnscaledPairCreated, 4236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "Number of load/store from unscaled generated"); 4336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 44c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", 45c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines cl::init(20), cl::Hidden); 4636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 4736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Place holder while testing unscaled load/store combining 48c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic cl::opt<bool> EnableAArch64UnscaledMemOp( 49c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines "aarch64-unscaled-mem-op", cl::Hidden, 50c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); 5136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesnamespace { 53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstruct AArch64LoadStoreOpt : public MachineFunctionPass { 5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines static char ID; 55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines AArch64LoadStoreOpt() : MachineFunctionPass(ID) {} 5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 57dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const AArch64InstrInfo *TII; 5836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterInfo *TRI; 5936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 6036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Scan the instructions looking for a load/store that can be combined 6136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // with the current instruction into a load/store pair. 6236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Return the matching instruction if one is found, else MBB->end(). 63c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // If a matching instruction is found, MergeForward is set to true if the 6436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // merge is to remove the first instruction and replace the second with 6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // a pair-wise insn, and false if the reverse is true. 664c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // \p SExtIdx[out] gives the index of the result of the load pair that 674c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // must be extended. The value of SExtIdx assumes that the paired load 684c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // produces the value in this order: (I, returned iterator), i.e., 694c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // -1 means no value has to be extended, 0 means I, and 1 means the 704c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // returned iterator. 7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, 724c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool &MergeForward, int &SExtIdx, 7336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned Limit); 7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge the two instructions indicated into a single pair-wise instruction. 75c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // If MergeForward is true, erase the first instruction and fold its 7636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // operation into the second. If false, the reverse. Return the instruction 77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // following the first instruction (which may change during processing). 784c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // \p SExtIdx index of the result that must be extended for a paired load. 794c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // -1 means none, 0 means I, and 1 means Paired. 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines mergePairedInsns(MachineBasicBlock::iterator I, 824c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MachineBasicBlock::iterator Paired, bool MergeForward, 834c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar int SExtIdx); 8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Scan the instruction list to find a base register update that can 8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // be combined with the current instruction (a load or store) using 8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // pre or post indexed addressing with writeback. Scan forwards. 8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator 8936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, 9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Value); 9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Scan the instruction list to find a base register update that can 9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // be combined with the current instruction (a load or store) using 9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // pre or post indexed addressing with writeback. Scan backwards. 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge a pre-index base register update into a ld/st instruction. 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator Update); 10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge a post-index base register update into a ld/st instruction. 10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator 10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, 10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator Update); 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 10836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool optimizeBlock(MachineBasicBlock &MBB); 10936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 110dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool runOnMachineFunction(MachineFunction &Fn) override; 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const char *getPassName() const override { 113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return "AArch64 load / store optimization pass"; 11436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 11536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 11636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesprivate: 11736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int getMemSize(MachineInstr *MemMI); 11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}; 119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hineschar AArch64LoadStoreOpt::ID = 0; 12037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} // namespace 12136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 12236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool isUnscaledLdst(unsigned Opc) { 12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (Opc) { 12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 12536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURSi: 12736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURDi: 12936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURQi: 13136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 132dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURWi: 13336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 134dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURXi: 13536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURSi: 13736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 138dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURDi: 13936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 140dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURQi: 14136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURWi: 14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURXi: 14536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 146ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDURSWi: 147ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return true; 14836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 14936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 15036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 15136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Size in bytes of the data moved by an unscaled load or store 152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesint AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { 15336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (MemMI->getOpcode()) { 15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 155c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines llvm_unreachable("Opcode has unknown size!"); 156dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRSui: 157dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURSi: 15836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 4; 159dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRDui: 160dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURDi: 16136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 8; 162dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRQui: 163dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURQi: 16436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 16; 165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRWui: 166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURWi: 16736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 4; 168dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRXui: 169dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURXi: 17036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 8; 171dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRSui: 172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURSi: 17336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 4; 174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRDui: 175dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURDi: 17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 8; 177dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRQui: 178dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURQi: 17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 16; 180dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRWui: 181dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURWi: 18236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 4; 183dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRXui: 184dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURXi: 18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return 8; 186ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDRSWui: 187ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDURSWi: 188ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return 4; 18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarstatic unsigned getMatchingNonSExtOpcode(unsigned Opc, 1934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool *IsValidLdStrOpc = nullptr) { 1944c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (IsValidLdStrOpc) 1954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar *IsValidLdStrOpc = true; 1964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar switch (Opc) { 1974c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar default: 1984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (IsValidLdStrOpc) 1994c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar *IsValidLdStrOpc = false; 2004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return UINT_MAX; 2014c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STRDui: 2024c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STURDi: 2034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STRQui: 2044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STURQi: 2054c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STRWui: 2064c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STURWi: 2074c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STRXui: 2084c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STURXi: 2094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDRDui: 2104c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDURDi: 2114c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDRQui: 2124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDURQi: 2134c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDRWui: 2144c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDURWi: 2154c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDRXui: 2164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDURXi: 2174c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STRSui: 2184c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::STURSi: 2194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDRSui: 2204c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDURSi: 2214c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return Opc; 2224c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDRSWui: 2234c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return AArch64::LDRWui; 2244c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AArch64::LDURSWi: 2254c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return AArch64::LDURWi; 2264c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 2274c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar} 2284c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 22936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic unsigned getMatchingPairOpcode(unsigned Opc) { 23036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (Opc) { 23136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 23236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Opcode has no pairwise equivalent!"); 233dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRSui: 234dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURSi: 235dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STPSi; 236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRDui: 237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURDi: 238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STPDi; 239dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRQui: 240dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURQi: 241dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STPQi; 242dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRWui: 243dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURWi: 244dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STPWi; 245dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRXui: 246dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURXi: 247dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STPXi; 248dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRSui: 249dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURSi: 250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDPSi; 251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRDui: 252dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURDi: 253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDPDi; 254dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRQui: 255dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURQi: 256dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDPQi; 257dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRWui: 258dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURWi: 259dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDPWi; 260dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRXui: 261dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURXi: 262dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDPXi; 263ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDRSWui: 264ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDURSWi: 265ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AArch64::LDPSWi; 26636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 26736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 26836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 26936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic unsigned getPreIndexedOpcode(unsigned Opc) { 27036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (Opc) { 27136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 27236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Opcode has no pre-indexed equivalent!"); 273c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::STRSui: 274c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::STRSpre; 275c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::STRDui: 276c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::STRDpre; 277c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::STRQui: 278c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::STRQpre; 279c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::STRWui: 280c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::STRWpre; 281c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::STRXui: 282c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::STRXpre; 283c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::LDRSui: 284c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::LDRSpre; 285c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::LDRDui: 286c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::LDRDpre; 287c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::LDRQui: 288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::LDRQpre; 289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::LDRWui: 290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::LDRWpre; 291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AArch64::LDRXui: 292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return AArch64::LDRXpre; 293ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDRSWui: 294ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AArch64::LDRSWpre; 29536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 29636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 29736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 29836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic unsigned getPostIndexedOpcode(unsigned Opc) { 29936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (Opc) { 30036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Opcode has no post-indexed wise equivalent!"); 302dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRSui: 303dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STRSpost; 304dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRDui: 305dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STRDpost; 306dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRQui: 307dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STRQpost; 308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRWui: 309dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STRWpost; 310dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRXui: 311dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::STRXpost; 312dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRSui: 313dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDRSpost; 314dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRDui: 315dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDRDpost; 316dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRQui: 317dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDRQpost; 318dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRWui: 319dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDRWpost; 320dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRXui: 321dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AArch64::LDRXpost; 322ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDRSWui: 323ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AArch64::LDRSWpost; 32436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 32536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 32636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 32736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineBasicBlock::iterator 328dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, 329dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock::iterator Paired, 3304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool MergeForward, int SExtIdx) { 33136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator NextI = I; 33236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NextI; 33336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If NextI is the second of the two instructions to be merged, we need 33436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // to skip one further. Either way we merge will invalidate the iterator, 33536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // and we don't need to scan the new instruction, as it's a pairwise 33636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instruction, which we're not considering for further action anyway. 33736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (NextI == Paired) 33836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NextI; 33936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned Opc = 3414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode()); 3424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool IsUnscaled = isUnscaledLdst(Opc); 343dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int OffsetStride = 344dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; 34536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 3464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned NewOpc = getMatchingPairOpcode(Opc); 34736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Insert our new paired instruction after whichever of the paired 348c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // instructions MergeForward indicates. 349c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; 350c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Also based on MergeForward is from where we copy the base register operand 35136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // so we get the flags compatible with the input code. 35236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand &BaseRegOp = 353c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MergeForward ? Paired->getOperand(1) : I->getOperand(1); 35436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 35536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Which register is Rt and which is Rt2 depends on the offset order. 35636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *RtMI, *Rt2MI; 35736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (I->getOperand(2).getImm() == 35836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Paired->getOperand(2).getImm() + OffsetStride) { 35936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines RtMI = Paired; 36036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Rt2MI = I; 3614c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Here we swapped the assumption made for SExtIdx. 3624c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // I.e., we turn ldp I, Paired into ldp Paired, I. 3634c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Update the index accordingly. 3644c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (SExtIdx != -1) 3654c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SExtIdx = (SExtIdx + 1) % 2; 36636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else { 36736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines RtMI = I; 36836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Rt2MI = Paired; 36936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 37036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Handle Unscaled 37136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int OffsetImm = RtMI->getOperand(2).getImm(); 372dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (IsUnscaled && EnableAArch64UnscaledMemOp) 37336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OffsetImm /= OffsetStride; 37436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 37536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Construct the new instruction. 37636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, 37736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines I->getDebugLoc(), TII->get(NewOpc)) 37836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(RtMI->getOperand(0)) 37936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(Rt2MI->getOperand(0)) 38036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(BaseRegOp) 38136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(OffsetImm); 38236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (void)MIB; 38336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 38436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FIXME: Do we need/want to copy the mem operands from the source 38536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instructions? Probably. What uses them after this? 38636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 38736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); 38836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(I->print(dbgs())); 38936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " "); 39036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(Paired->print(dbgs())); 39136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " with instruction:\n "); 3924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 3934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (SExtIdx != -1) { 3944c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Generate the sign extension for the proper result of the ldp. 3954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // I.e., with X1, that would be: 3964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // %W1<def> = KILL %W1, %X1<imp-def> 3974c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // %X1<def> = SBFMXri %X1<kill>, 0, 31 3984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MachineOperand &DstMO = MIB->getOperand(SExtIdx); 3994c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Right now, DstMO has the extended register, since it comes from an 4004c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // extended opcode. 4014c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned DstRegX = DstMO.getReg(); 4024c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Get the W variant of that register. 4034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32); 4044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Update the result of LDP to use the W instead of the X variant. 4054c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DstMO.setReg(DstRegW); 4064c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(((MachineInstr *)MIB)->print(dbgs())); 4074c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(dbgs() << "\n"); 4084c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Make the machine verifier happy by providing a definition for 4094c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // the X register. 4104c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Insert this definition right after the generated LDP, i.e., before 4114c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // InsertionPoint. 4124c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MachineInstrBuilder MIBKill = 4134c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 4144c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar TII->get(TargetOpcode::KILL), DstRegW) 4154c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar .addReg(DstRegW) 4164c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar .addReg(DstRegX, RegState::Define); 4174c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MIBKill->getOperand(2).setImplicit(); 4184c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Create the sign extension. 4194c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MachineInstrBuilder MIBSXTW = 4204c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(), 4214c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar TII->get(AArch64::SBFMXri), DstRegX) 4224c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar .addReg(DstRegX) 4234c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar .addImm(0) 4244c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar .addImm(31); 4254c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar (void)MIBSXTW; 4264c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(dbgs() << " Extend operand:\n "); 4274c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs())); 4284c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(dbgs() << "\n"); 4294c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } else { 4304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(((MachineInstr *)MIB)->print(dbgs())); 4314c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar DEBUG(dbgs() << "\n"); 4324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 43336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 43436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Erase the old instructions. 43536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines I->eraseFromParent(); 43636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Paired->eraseFromParent(); 43736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 43836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return NextI; 43936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 44036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 44136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// trackRegDefsUses - Remember what registers the specified instruction uses 44236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// and modifies. 44336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, 44436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BitVector &UsedRegs, 44536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterInfo *TRI) { 44636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 44736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand &MO = MI->getOperand(i); 44836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MO.isRegMask()) 44936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ModifiedRegs.setBitsNotInMask(MO.getRegMask()); 45036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 45136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!MO.isReg()) 45236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 45336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned Reg = MO.getReg(); 45436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MO.isDef()) { 45536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 45636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ModifiedRegs.set(*AI); 45736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else { 45836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert(MO.isUse() && "Reg operand not a def and not a use?!?"); 45936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 46036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UsedRegs.set(*AI); 46136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 46236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 46336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 46436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 46536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { 46636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!IsUnscaled && (Offset > 63 || Offset < -64)) 46736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 46836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (IsUnscaled) { 46936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Convert the byte-offset used by unscaled into an "element" offset used 47036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // by the scaled pair load/store instructions. 471c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines int ElemOffset = Offset / OffsetStride; 472c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (ElemOffset > 63 || ElemOffset < -64) 47336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 47436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 47536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 47636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 47736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 47836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// Do alignment, specialized to power of 2 and for signed ints, 47936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// avoiding having to do a C-style cast from uint_64t to int when 48036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// using RoundUpToAlignment from include/llvm/Support/MathExtras.h. 48136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// FIXME: Move this function to include/MathExtras.h? 48236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic int alignTo(int Num, int PowOf2) { 48336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return (Num + PowOf2 - 1) & ~(PowOf2 - 1); 48436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 48536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 48636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// findMatchingInsn - Scan the instructions looking for a load/store that can 48736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// be combined with the current instruction into a load/store pair. 48836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineBasicBlock::iterator 489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, 4904c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool &MergeForward, int &SExtIdx, 4914c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned Limit) { 49236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator E = I->getParent()->end(); 49336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MBBI = I; 49436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *FirstMI = I; 49536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 49636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 49736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Opc = FirstMI->getOpcode(); 498c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines bool MayLoad = FirstMI->mayLoad(); 49936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool IsUnscaled = isUnscaledLdst(Opc); 50036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned Reg = FirstMI->getOperand(0).getReg(); 50136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned BaseReg = FirstMI->getOperand(1).getReg(); 50236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Offset = FirstMI->getOperand(2).getImm(); 50336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 50436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Early exit if the first instruction modifies the base register. 50536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // e.g., ldr x0, [x0] 50636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Early exit if the offset if not possible to match. (6 bits of positive 50736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // range, plus allow an extra one in case we find a later insn that matches 50836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // with Offset-1 50936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (FirstMI->modifiesRegister(BaseReg, TRI)) 51036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 51136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int OffsetStride = 512dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; 51336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) 51436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 51536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 51636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Track which registers have been modified and used between the first insn 51736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // (inclusive) and the second insn. 51836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BitVector ModifiedRegs, UsedRegs; 51936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ModifiedRegs.resize(TRI->getNumRegs()); 52036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UsedRegs.resize(TRI->getNumRegs()); 52136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { 52236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MI = MBBI; 52336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Skip DBG_VALUE instructions. Otherwise debug info can affect the 52436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // optimization by changing how far we scan. 52536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->isDebugValue()) 52636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 52736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 52836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Now that we know this is a real instruction, count it. 52936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++Count; 53036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5314c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool CanMergeOpc = Opc == MI->getOpcode(); 5324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SExtIdx = -1; 5334c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!CanMergeOpc) { 5344c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar bool IsValidLdStrOpc; 5354c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc); 5364c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!IsValidLdStrOpc) 5374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar continue; 5384c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar // Opc will be the first instruction in the pair. 5394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0; 5404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode()); 5414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 5424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 5434c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (CanMergeOpc && MI->getOperand(2).isImm()) { 54436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If we've found another instruction with the same opcode, check to see 54536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // if the base and offset are compatible with our starting instruction. 54636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // These instructions all have scaled immediate operands, so we just 54736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // check for +1/-1. Make sure to check the new instruction offset is 54836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // actually an immediate and not a symbolic reference destined for 54936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // a relocation. 55036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 55136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Pairwise instructions have a 7-bit signed offset field. Single insns 55236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // have a 12-bit unsigned offset field. To be a valid combine, the 55336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // final offset must be in range. 55436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned MIBaseReg = MI->getOperand(1).getReg(); 55536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int MIOffset = MI->getOperand(2).getImm(); 55636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || 55736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (Offset + OffsetStride == MIOffset))) { 55836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int MinOffset = Offset < MIOffset ? Offset : MIOffset; 55936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If this is a volatile load/store that otherwise matched, stop looking 56036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // as something is going on that we don't have enough information to 56136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // safely transform. Similarly, stop if we see a hint to avoid pairs. 56236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) 56336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 56436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the resultant immediate offset of merging these instructions 56536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // is out of range for a pairwise instruction, bail and keep looking. 56636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); 56736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { 56836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 56936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 57036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 57136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the alignment requirements of the paired (scaled) instruction 57236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // can't express the offset of the unscaled input, bail and keep 57336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // looking. 574dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (IsUnscaled && EnableAArch64UnscaledMemOp && 57536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (alignTo(MinOffset, OffsetStride) != MinOffset)) { 57636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 57736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 57836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 57936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the destination register of the loads is the same register, bail 58036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // and keep looking. A load-pair instruction with both destination 58136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // registers the same is UNPREDICTABLE and will result in an exception. 582c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (MayLoad && Reg == MI->getOperand(0).getReg()) { 58336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 58436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 58536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 58636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 58736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the Rt of the second instruction was not modified or used between 58836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // the two instructions, we can combine the second into the first. 58936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!ModifiedRegs[MI->getOperand(0).getReg()] && 59036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines !UsedRegs[MI->getOperand(0).getReg()]) { 591c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MergeForward = false; 59236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MBBI; 59336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 59436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 59536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Likewise, if the Rt of the first instruction is not modified or used 59636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // between the two instructions, we can combine the first into the 59736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // second. 59836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && 59936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines !UsedRegs[FirstMI->getOperand(0).getReg()]) { 600c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MergeForward = true; 60136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MBBI; 60236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 60336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Unable to combine these instructions due to interference in between. 60436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Keep looking. 60536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 60636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 60736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 60836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the instruction wasn't a matching load or store, but does (or can) 60936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // modify memory, stop searching, as we don't have alias analysis or 61036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // anything like that to tell us whether the access is tromping on the 61136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // locations we care about. The big one we want to catch is calls. 61236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 61336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FIXME: Theoretically, we can do better than that for SP and FP based 61436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // references since we can effectively know where those are touching. It's 61536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // unclear if it's worth the extra code, though. Most paired instructions 61636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // will be sequential, perhaps with a few intervening non-memory related 61736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instructions. 61836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->mayStore() || MI->isCall()) 61936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 62036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Likewise, if we're matching a store instruction, we don't want to 62136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // move across a load, as it may be reading the same location. 62236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (FirstMI->mayStore() && MI->mayLoad()) 62336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 62436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 62536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Update modified / uses register lists. 62636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 62736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 62836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Otherwise, if the base register is modified, we have no match, so 62936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // return early. 63036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ModifiedRegs[BaseReg]) 63136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 63236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 63336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 63436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 63536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 63636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineBasicBlock::iterator 637dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesAArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, 638dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock::iterator Update) { 639dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert((Update->getOpcode() == AArch64::ADDXri || 640dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Update->getOpcode() == AArch64::SUBXri) && 64136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "Unexpected base register update instruction to merge!"); 64236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator NextI = I; 64336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Return the instruction following the merged instruction, which is 64436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // the instruction following our unmerged load. Unless that's the add/sub 64536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instruction we're merging, in which case it's the one after that. 64636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (++NextI == Update) 64736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NextI; 64836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 64936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Value = Update->getOperand(2).getImm(); 650dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 65136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "Can't merge 1 << 12 offset into pre-indexed load / store"); 652dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Update->getOpcode() == AArch64::SUBXri) 65336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Value = -Value; 65436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 65536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); 65636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstrBuilder MIB = 65736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 658dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addOperand(Update->getOperand(0)) 65936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(I->getOperand(0)) 66036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(I->getOperand(1)) 66136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(Value); 66236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (void)MIB; 66336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 66436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "Creating pre-indexed load/store."); 66536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " Replacing instructions:\n "); 66636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(I->print(dbgs())); 66736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " "); 66836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(Update->print(dbgs())); 66936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " with instruction:\n "); 67036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(((MachineInstr *)MIB)->print(dbgs())); 67136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "\n"); 67236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 67336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Erase the old instructions for the block. 67436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines I->eraseFromParent(); 67536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Update->eraseFromParent(); 67636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 67736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return NextI; 67836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 67936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 680dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesMachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn( 681dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { 682dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert((Update->getOpcode() == AArch64::ADDXri || 683dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Update->getOpcode() == AArch64::SUBXri) && 68436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "Unexpected base register update instruction to merge!"); 68536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator NextI = I; 68636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Return the instruction following the merged instruction, which is 68736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // the instruction following our unmerged load. Unless that's the add/sub 68836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instruction we're merging, in which case it's the one after that. 68936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (++NextI == Update) 69036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NextI; 69136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 69236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Value = Update->getOperand(2).getImm(); 693dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && 69436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines "Can't merge 1 << 12 offset into post-indexed load / store"); 695dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Update->getOpcode() == AArch64::SUBXri) 69636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Value = -Value; 69736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 69836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); 69936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstrBuilder MIB = 70036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) 701dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addOperand(Update->getOperand(0)) 70236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(I->getOperand(0)) 70336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addOperand(I->getOperand(1)) 70436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(Value); 70536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (void)MIB; 70636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 70736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "Creating post-indexed load/store."); 70836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " Replacing instructions:\n "); 70936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(I->print(dbgs())); 71036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " "); 71136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(Update->print(dbgs())); 71236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << " with instruction:\n "); 71336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(((MachineInstr *)MIB)->print(dbgs())); 71436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DEBUG(dbgs() << "\n"); 71536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 71636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Erase the old instructions for the block. 71736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines I->eraseFromParent(); 71836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Update->eraseFromParent(); 71936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 72036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return NextI; 72136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 72236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 72336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesstatic bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, 72436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Offset) { 72536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (MI->getOpcode()) { 72636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 72736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 728dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::SUBXri: 72936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Negate the offset for a SUB instruction. 73036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Offset *= -1; 73136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FALLTHROUGH 732dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::ADDXri: 73336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Make sure it's a vanilla immediate operand, not a relocation or 73436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // anything else we can't handle. 73536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!MI->getOperand(2).isImm()) 73636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 73736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Watch out for 1 << 12 shifted value. 738dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) 73936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 74036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the instruction has the base register as source and dest and the 74136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // immediate will fit in a signed 9-bit integer, then we have a match. 74236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->getOperand(0).getReg() == BaseReg && 74336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MI->getOperand(1).getReg() == BaseReg && 74436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MI->getOperand(2).getImm() <= 255 && 74536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MI->getOperand(2).getImm() >= -256) { 74636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If we have a non-zero Offset, we check that it matches the amount 74736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // we're adding to the register. 74836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!Offset || Offset == MI->getOperand(2).getImm()) 74936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 75036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 75136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 75236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 75336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 75436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 75536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 756dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( 757dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock::iterator I, unsigned Limit, int Value) { 75836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator E = I->getParent()->end(); 75936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MemMI = I; 76036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MBBI = I; 76136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MachineFunction &MF = *MemMI->getParent()->getParent(); 76236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 76336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned DestReg = MemMI->getOperand(0).getReg(); 76436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned BaseReg = MemMI->getOperand(1).getReg(); 76536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Offset = MemMI->getOperand(2).getImm() * 76636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); 76736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 76836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the base register overlaps the destination register, we can't 76936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // merge the update. 77036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 77136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 77236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 77336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Scan forward looking for post-index opportunities. 77436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Updating instructions can't be formed if the memory insn already 77536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // has an offset other than the value we're looking for. 77636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Offset != Value) 77736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 77836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 77936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Track which registers have been modified and used between the first insn 78036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // (inclusive) and the second insn. 78136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BitVector ModifiedRegs, UsedRegs; 78236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ModifiedRegs.resize(TRI->getNumRegs()); 78336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UsedRegs.resize(TRI->getNumRegs()); 78436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 78536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned Count = 0; MBBI != E; ++MBBI) { 78636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MI = MBBI; 78736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Skip DBG_VALUE instructions. Otherwise debug info can affect the 78836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // optimization by changing how far we scan. 78936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->isDebugValue()) 79036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 79136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 79236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Now that we know this is a real instruction, count it. 79336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++Count; 79436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 79536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If we found a match, return it. 79636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isMatchingUpdateInsn(MI, BaseReg, Value)) 79736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MBBI; 79836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 79936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Update the status of what the instruction clobbered and used. 80036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 80136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 80236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Otherwise, if the base register is used or modified, we have no match, so 80336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // return early. 80436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 80536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 80636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 80736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 80836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 80936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 810dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesMachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( 811dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock::iterator I, unsigned Limit) { 81236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator B = I->getParent()->begin(); 81336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator E = I->getParent()->end(); 81436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MemMI = I; 81536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MBBI = I; 81636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MachineFunction &MF = *MemMI->getParent()->getParent(); 81736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 81836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned DestReg = MemMI->getOperand(0).getReg(); 81936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned BaseReg = MemMI->getOperand(1).getReg(); 82036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Offset = MemMI->getOperand(2).getImm(); 82136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); 82236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 82336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the load/store is the first instruction in the block, there's obviously 82436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // not any matching update. Ditto if the memory offset isn't zero. 82536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MBBI == B || Offset != 0) 82636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 82736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If the base register overlaps the destination register, we can't 82836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // merge the update. 82936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) 83036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 83136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 83236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Track which registers have been modified and used between the first insn 83336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // (inclusive) and the second insn. 83436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BitVector ModifiedRegs, UsedRegs; 83536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ModifiedRegs.resize(TRI->getNumRegs()); 83636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines UsedRegs.resize(TRI->getNumRegs()); 83736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines --MBBI; 83836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned Count = 0; MBBI != B; --MBBI) { 83936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MI = MBBI; 84036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Skip DBG_VALUE instructions. Otherwise debug info can affect the 84136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // optimization by changing how far we scan. 84236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->isDebugValue()) 84336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 84436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 84536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Now that we know this is a real instruction, count it. 84636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++Count; 84736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 84836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If we found a match, return it. 84936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) 85036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MBBI; 85136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 85236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Update the status of what the instruction clobbered and used. 85336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); 85436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 85536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Otherwise, if the base register is used or modified, we have no match, so 85636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // return early. 85736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) 85836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 85936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 86036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return E; 86136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 86236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 863dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { 86436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool Modified = false; 86536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Two tranformations to do here: 86636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 1) Find loads and stores that can be merged into a single load or store 86736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // pair instruction. 86836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // e.g., 86936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x0, [x2] 87036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x1, [x2, #8] 87136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ; becomes 87236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldp x0, x1, [x2] 87336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 2) Find base register updates that can be merged into the load or store 87436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // as a base-reg writeback. 87536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // e.g., 87636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x0, [x2] 87736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // add x2, x2, #4 87836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ; becomes 87936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x0, [x2], #4 88036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 88136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 88236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MBBI != E;) { 88336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MI = MBBI; 88436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (MI->getOpcode()) { 88536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 88636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Just move on to the next instruction. 88736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 88836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 889dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRSui: 890dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRDui: 891dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRQui: 892dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRXui: 893dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRWui: 894dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRSui: 895dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRDui: 896dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRQui: 897dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRXui: 898dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRWui: 899ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDRSWui: 90036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // do the unscaled versions as well 901dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURSi: 902dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURDi: 903dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURQi: 904dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURWi: 905dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURXi: 906dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURSi: 907dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURDi: 908dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURQi: 909dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURWi: 910ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDURXi: 911ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AArch64::LDURSWi: { 91236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // If this is a volatile load/store, don't mess with it. 91336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MI->hasOrderedMemoryRef()) { 91436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 91536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 91636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 91736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Make sure this is a reg+imm (as opposed to an address reloc). 91836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!MI->getOperand(2).isImm()) { 91936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 92036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 92136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 92236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Check if this load/store has a hint to avoid pair formation. 923dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. 92436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (TII->isLdStPairSuppressed(MI)) { 92536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 92636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 92736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 92836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Look ahead up to ScanLimit instructions for a pairable instruction. 929c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines bool MergeForward = false; 9304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar int SExtIdx = -1; 93136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator Paired = 9324c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit); 93336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Paired != E) { 93436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge the loads into a pair. Keeping the iterator straight is a 93536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // pain, so we let the merge routine tell us what the next instruction 93636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // is after it's done mucking about. 9374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx); 93836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 93936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Modified = true; 94036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumPairCreated; 94136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isUnscaledLdst(MI->getOpcode())) 94236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumUnscaledPairCreated; 94336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 94436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 94536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 94636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 94736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 94836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FIXME: Do the other instructions. 94936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 95036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 95136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 95236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 95336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MBBI != E;) { 95436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineInstr *MI = MBBI; 95536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Do update merging. It's simpler to keep this separate from the above 95636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // switch, though not strictly necessary. 95736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Opc = MI->getOpcode(); 95836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (Opc) { 95936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 96036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Just move on to the next instruction. 96136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 96236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 963dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRSui: 964dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRDui: 965dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRQui: 966dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRXui: 967dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STRWui: 968dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRSui: 969dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRDui: 970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRQui: 971dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRXui: 972dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDRWui: 97336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // do the unscaled versions as well 974dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURSi: 975dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURDi: 976dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURQi: 977dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURWi: 978dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::STURXi: 979dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURSi: 980dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURDi: 981dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURQi: 982dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURWi: 983dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AArch64::LDURXi: { 98436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Make sure this is a reg+imm (as opposed to an address reloc). 98536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!MI->getOperand(2).isImm()) { 98636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 98736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 98836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 98936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Look ahead up to ScanLimit instructions for a mergable instruction. 99036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator Update = 99136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); 99236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Update != E) { 99336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge the update into the ld/st. 99436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MBBI = mergePostIdxUpdateInsn(MBBI, Update); 99536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Modified = true; 99636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumPostFolded; 99736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 99836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 99936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Don't know how to handle pre/post-index versions, so move to the next 100036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // instruction. 100136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isUnscaledLdst(Opc)) { 100236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 100336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 100436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 100536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 100636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Look back to try to find a pre-index instruction. For example, 100736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // add x0, x0, #8 100836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x1, [x0] 100936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // merged into: 101036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x1, [x0, #8]! 101136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); 101236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Update != E) { 101336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge the update into the ld/st. 101436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MBBI = mergePreIdxUpdateInsn(MBBI, Update); 101536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Modified = true; 101636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumPreFolded; 101736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 101836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 101936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 102036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Look forward to try to find a post-index instruction. For example, 102136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // ldr x1, [x0, #64] 102236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // add x0, x0, #64 102336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // merged into: 1024dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // ldr x1, [x0, #64]! 102536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 102636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // The immediate in the load/store is scaled by the size of the register 102736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // being loaded. The immediate in the add we're looking for, 102836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // however, is not, so adjust here. 102936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int Value = MI->getOperand(2).getImm() * 103036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) 103136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ->getSize(); 103236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); 103336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Update != E) { 103436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Merge the update into the ld/st. 103536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MBBI = mergePreIdxUpdateInsn(MBBI, Update); 103636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Modified = true; 103736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++NumPreFolded; 103836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 103936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 104036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 104136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Nothing found. Just move to the next instruction. 104236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ++MBBI; 104336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 104436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 104536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FIXME: Do the other instructions. 104636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 104736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 104836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 104936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return Modified; 105036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 105136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1052dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo()); 1054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TRI = Fn.getSubtarget().getRegisterInfo(); 105536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 105636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines bool Modified = false; 105736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (auto &MBB : Fn) 105836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Modified |= optimizeBlock(MBB); 105936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 106036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return Modified; 106136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 106236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 106336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep 106436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines// loads and stores near one another? 106536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 106636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 106736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines/// optimization pass. 1068dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesFunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { 1069dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return new AArch64LoadStoreOpt(); 107036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 1071