ARMLoadStoreOptimizer.cpp revision 90c579de5a383cee278acc3f7e7b9d0a656e6a35
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMBaseInstrInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMRegisterInfo.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/Target/TargetData.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/ADT/DenseMap.h" 35#include "llvm/ADT/STLExtras.h" 36#include "llvm/ADT/SmallPtrSet.h" 37#include "llvm/ADT/SmallSet.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40using namespace llvm; 41 42STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55/// load / store instructions to form ldm / stm instructions. 56 57namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129} 130 131static int getLoadStoreMultipleOpcode(int Opcode) { 132 switch (Opcode) { 133 case ARM::LDR: 134 ++NumLDMGened; 135 return ARM::LDM; 136 case ARM::STR: 137 ++NumSTMGened; 138 return ARM::STM; 139 case ARM::t2LDRi8: 140 case ARM::t2LDRi12: 141 ++NumLDMGened; 142 return ARM::t2LDM; 143 case ARM::t2STRi8: 144 case ARM::t2STRi12: 145 ++NumSTMGened; 146 return ARM::t2STM; 147 case ARM::VLDRS: 148 ++NumVLDMGened; 149 return ARM::VLDMS; 150 case ARM::VSTRS: 151 ++NumVSTMGened; 152 return ARM::VSTMS; 153 case ARM::VLDRD: 154 ++NumVLDMGened; 155 return ARM::VLDMD; 156 case ARM::VSTRD: 157 ++NumVSTMGened; 158 return ARM::VSTMD; 159 default: llvm_unreachable("Unhandled opcode!"); 160 } 161 return 0; 162} 163 164static bool isT2i32Load(unsigned Opc) { 165 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 166} 167 168static bool isi32Load(unsigned Opc) { 169 return Opc == ARM::LDR || isT2i32Load(Opc); 170} 171 172static bool isT2i32Store(unsigned Opc) { 173 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 174} 175 176static bool isi32Store(unsigned Opc) { 177 return Opc == ARM::STR || isT2i32Store(Opc); 178} 179 180/// MergeOps - Create and insert a LDM or STM with Base as base register and 181/// registers in Regs as the register operands that would be loaded / stored. 182/// It returns true if the transformation is done. 183bool 184ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 185 MachineBasicBlock::iterator MBBI, 186 int Offset, unsigned Base, bool BaseKill, 187 int Opcode, ARMCC::CondCodes Pred, 188 unsigned PredReg, unsigned Scratch, DebugLoc dl, 189 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 190 // Only a single register to load / store. Don't bother. 191 unsigned NumRegs = Regs.size(); 192 if (NumRegs <= 1) 193 return false; 194 195 ARM_AM::AMSubMode Mode = ARM_AM::ia; 196 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 197 if (isAM4 && Offset == 4) { 198 if (isThumb2) 199 // Thumb2 does not support ldmib / stmib. 200 return false; 201 Mode = ARM_AM::ib; 202 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 203 if (isThumb2) 204 // Thumb2 does not support ldmda / stmda. 205 return false; 206 Mode = ARM_AM::da; 207 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 208 Mode = ARM_AM::db; 209 } else if (Offset != 0) { 210 // If starting offset isn't zero, insert a MI to materialize a new base. 211 // But only do so if it is cost effective, i.e. merging more than two 212 // loads / stores. 213 if (NumRegs <= 2) 214 return false; 215 216 unsigned NewBase; 217 if (isi32Load(Opcode)) 218 // If it is a load, then just use one of the destination register to 219 // use as the new base. 220 NewBase = Regs[NumRegs-1].first; 221 else { 222 // Use the scratch register to use as a new base. 223 NewBase = Scratch; 224 if (NewBase == 0) 225 return false; 226 } 227 int BaseOpc = !isThumb2 228 ? ARM::ADDri 229 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 230 if (Offset < 0) { 231 BaseOpc = !isThumb2 232 ? ARM::SUBri 233 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 234 Offset = - Offset; 235 } 236 int ImmedOffset = isThumb2 237 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 238 if (ImmedOffset == -1) 239 // FIXME: Try t2ADDri12 or t2SUBri12? 240 return false; // Probably not worth it then. 241 242 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 243 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 244 .addImm(Pred).addReg(PredReg).addReg(0); 245 Base = NewBase; 246 BaseKill = true; // New base is always killed right its use. 247 } 248 249 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); 250 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 251 Opcode == ARM::VLDRD); 252 Opcode = getLoadStoreMultipleOpcode(Opcode); 253 MachineInstrBuilder MIB = (isAM4) 254 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 255 .addReg(Base, getKillRegState(BaseKill)) 256 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 257 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 258 .addReg(Base, getKillRegState(BaseKill)) 259 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) 260 .addImm(Pred).addReg(PredReg); 261 for (unsigned i = 0; i != NumRegs; ++i) 262 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 263 | getKillRegState(Regs[i].second)); 264 265 return true; 266} 267 268// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 269// success. 270void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 271 MemOpQueue &memOps, 272 unsigned memOpsBegin, unsigned memOpsEnd, 273 unsigned insertAfter, int Offset, 274 unsigned Base, bool BaseKill, 275 int Opcode, 276 ARMCC::CondCodes Pred, unsigned PredReg, 277 unsigned Scratch, 278 DebugLoc dl, 279 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 280 // First calculate which of the registers should be killed by the merged 281 // instruction. 282 const unsigned insertPos = memOps[insertAfter].Position; 283 284 SmallSet<unsigned, 4> UnavailRegs; 285 SmallSet<unsigned, 4> KilledRegs; 286 DenseMap<unsigned, unsigned> Killer; 287 for (unsigned i = 0; i < memOpsBegin; ++i) { 288 if (memOps[i].Position < insertPos && memOps[i].isKill) { 289 unsigned Reg = memOps[i].Reg; 290 if (memOps[i].Merged) 291 UnavailRegs.insert(Reg); 292 else { 293 KilledRegs.insert(Reg); 294 Killer[Reg] = i; 295 } 296 } 297 } 298 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { 299 if (memOps[i].Position < insertPos && memOps[i].isKill) { 300 unsigned Reg = memOps[i].Reg; 301 KilledRegs.insert(Reg); 302 Killer[Reg] = i; 303 } 304 } 305 306 SmallVector<std::pair<unsigned, bool>, 8> Regs; 307 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 308 unsigned Reg = memOps[i].Reg; 309 if (UnavailRegs.count(Reg)) 310 // Register is killed before and it's not easy / possible to update the 311 // kill marker on already merged instructions. Abort. 312 return; 313 314 // If we are inserting the merged operation after an unmerged operation that 315 // uses the same register, make sure to transfer any kill flag. 316 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 317 Regs.push_back(std::make_pair(Reg, isKill)); 318 } 319 320 // Try to do the merge. 321 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 322 ++Loc; 323 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 324 Pred, PredReg, Scratch, dl, Regs)) 325 return; 326 327 // Merge succeeded, update records. 328 Merges.push_back(prior(Loc)); 329 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 330 // Remove kill flags from any unmerged memops that come before insertPos. 331 if (Regs[i-memOpsBegin].second) { 332 unsigned Reg = Regs[i-memOpsBegin].first; 333 if (KilledRegs.count(Reg)) { 334 unsigned j = Killer[Reg]; 335 memOps[j].MBBI->getOperand(0).setIsKill(false); 336 } 337 } 338 MBB.erase(memOps[i].MBBI); 339 memOps[i].Merged = true; 340 } 341} 342 343/// MergeLDR_STR - Merge a number of load / store instructions into one or more 344/// load / store multiple instructions. 345void 346ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 347 unsigned Base, int Opcode, unsigned Size, 348 ARMCC::CondCodes Pred, unsigned PredReg, 349 unsigned Scratch, MemOpQueue &MemOps, 350 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 351 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 352 int Offset = MemOps[SIndex].Offset; 353 int SOffset = Offset; 354 unsigned insertAfter = SIndex; 355 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 356 DebugLoc dl = Loc->getDebugLoc(); 357 const MachineOperand &PMO = Loc->getOperand(0); 358 unsigned PReg = PMO.getReg(); 359 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 360 : ARMRegisterInfo::getRegisterNumbering(PReg); 361 unsigned Count = 1; 362 363 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 364 int NewOffset = MemOps[i].Offset; 365 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 366 unsigned Reg = MO.getReg(); 367 unsigned RegNum = MO.isUndef() ? UINT_MAX 368 : ARMRegisterInfo::getRegisterNumbering(Reg); 369 // AM4 - register numbers in ascending order. 370 // AM5 - consecutive register numbers in ascending order. 371 // Can only do up to 16 double-word registers per insn. 372 if (Reg != ARM::SP && 373 NewOffset == Offset + (int)Size && 374 ((isAM4 && RegNum > PRegNum) 375 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 376 Offset += Size; 377 PRegNum = RegNum; 378 ++Count; 379 } else { 380 // Can't merge this in. Try merge the earlier ones first. 381 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 382 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 383 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 384 MemOps, Merges); 385 return; 386 } 387 388 if (MemOps[i].Position > MemOps[insertAfter].Position) 389 insertAfter = i; 390 } 391 392 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 393 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 394 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 395 return; 396} 397 398static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 399 unsigned Bytes, unsigned Limit, 400 ARMCC::CondCodes Pred, unsigned PredReg){ 401 unsigned MyPredReg = 0; 402 if (!MI) 403 return false; 404 if (MI->getOpcode() != ARM::t2SUBri && 405 MI->getOpcode() != ARM::t2SUBrSPi && 406 MI->getOpcode() != ARM::t2SUBrSPi12 && 407 MI->getOpcode() != ARM::tSUBspi && 408 MI->getOpcode() != ARM::SUBri) 409 return false; 410 411 // Make sure the offset fits in 8 bits. 412 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 413 return false; 414 415 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 416 return (MI->getOperand(0).getReg() == Base && 417 MI->getOperand(1).getReg() == Base && 418 (MI->getOperand(2).getImm()*Scale) == Bytes && 419 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 420 MyPredReg == PredReg); 421} 422 423static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 424 unsigned Bytes, unsigned Limit, 425 ARMCC::CondCodes Pred, unsigned PredReg){ 426 unsigned MyPredReg = 0; 427 if (!MI) 428 return false; 429 if (MI->getOpcode() != ARM::t2ADDri && 430 MI->getOpcode() != ARM::t2ADDrSPi && 431 MI->getOpcode() != ARM::t2ADDrSPi12 && 432 MI->getOpcode() != ARM::tADDspi && 433 MI->getOpcode() != ARM::ADDri) 434 return false; 435 436 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 437 // Make sure the offset fits in 8 bits. 438 return false; 439 440 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 441 return (MI->getOperand(0).getReg() == Base && 442 MI->getOperand(1).getReg() == Base && 443 (MI->getOperand(2).getImm()*Scale) == Bytes && 444 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 445 MyPredReg == PredReg); 446} 447 448static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 449 switch (MI->getOpcode()) { 450 default: return 0; 451 case ARM::LDR: 452 case ARM::STR: 453 case ARM::t2LDRi8: 454 case ARM::t2LDRi12: 455 case ARM::t2STRi8: 456 case ARM::t2STRi12: 457 case ARM::VLDRS: 458 case ARM::VSTRS: 459 return 4; 460 case ARM::VLDRD: 461 case ARM::VSTRD: 462 return 8; 463 case ARM::LDM: 464 case ARM::STM: 465 case ARM::t2LDM: 466 case ARM::t2STM: 467 return (MI->getNumOperands() - 4) * 4; 468 case ARM::VLDMS: 469 case ARM::VSTMS: 470 case ARM::VLDMD: 471 case ARM::VSTMD: 472 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 473 } 474} 475 476static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 477 switch (Opc) { 478 case ARM::LDM: return ARM::LDM_UPD; 479 case ARM::STM: return ARM::STM_UPD; 480 case ARM::t2LDM: return ARM::t2LDM_UPD; 481 case ARM::t2STM: return ARM::t2STM_UPD; 482 case ARM::VLDMS: return ARM::VLDMS_UPD; 483 case ARM::VLDMD: return ARM::VLDMD_UPD; 484 case ARM::VSTMS: return ARM::VSTMS_UPD; 485 case ARM::VSTMD: return ARM::VSTMD_UPD; 486 default: llvm_unreachable("Unhandled opcode!"); 487 } 488 return 0; 489} 490 491/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 492/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 493/// 494/// stmia rn, <ra, rb, rc> 495/// rn := rn + 4 * 3; 496/// => 497/// stmia rn!, <ra, rb, rc> 498/// 499/// rn := rn - 4 * 3; 500/// ldmia rn, <ra, rb, rc> 501/// => 502/// ldmdb rn!, <ra, rb, rc> 503bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 504 MachineBasicBlock::iterator MBBI, 505 bool &Advance, 506 MachineBasicBlock::iterator &I) { 507 MachineInstr *MI = MBBI; 508 unsigned Base = MI->getOperand(0).getReg(); 509 bool BaseKill = MI->getOperand(0).isKill(); 510 unsigned Bytes = getLSMultipleTransferSize(MI); 511 unsigned PredReg = 0; 512 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 513 int Opcode = MI->getOpcode(); 514 DebugLoc dl = MI->getDebugLoc(); 515 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || 516 Opcode == ARM::STM || Opcode == ARM::t2STM); 517 518 bool DoMerge = false; 519 ARM_AM::AMSubMode Mode = ARM_AM::ia; 520 unsigned Offset = 0; 521 522 if (isAM4) { 523 // Can't use an updating ld/st if the base register is also a dest 524 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 525 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 526 if (MI->getOperand(i).getReg() == Base) 527 return false; 528 } 529 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 530 } else { 531 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 532 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 533 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 534 } 535 536 // Try merging with the previous instruction. 537 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 538 if (MBBI != BeginMBBI) { 539 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 540 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 541 --PrevMBBI; 542 if (isAM4) { 543 if (Mode == ARM_AM::ia && 544 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 545 DoMerge = true; 546 Mode = ARM_AM::db; 547 } else if (isAM4 && Mode == ARM_AM::ib && 548 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 549 DoMerge = true; 550 Mode = ARM_AM::da; 551 } 552 } else { 553 if (Mode == ARM_AM::ia && 554 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 555 Mode = ARM_AM::db; 556 DoMerge = true; 557 } 558 } 559 if (DoMerge) 560 MBB.erase(PrevMBBI); 561 } 562 563 // Try merging with the next instruction. 564 MachineBasicBlock::iterator EndMBBI = MBB.end(); 565 if (!DoMerge && MBBI != EndMBBI) { 566 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 567 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 568 ++NextMBBI; 569 if (isAM4) { 570 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 571 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 572 DoMerge = true; 573 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 574 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 575 DoMerge = true; 576 } 577 } else { 578 if (Mode == ARM_AM::ia && 579 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 580 DoMerge = true; 581 } 582 } 583 if (DoMerge) { 584 if (NextMBBI == I) { 585 Advance = true; 586 ++I; 587 } 588 MBB.erase(NextMBBI); 589 } 590 } 591 592 if (!DoMerge) 593 return false; 594 595 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 596 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 597 .addReg(Base, getDefRegState(true)) // WB base register 598 .addReg(Base, getKillRegState(BaseKill)); 599 if (isAM4) { 600 // [t2]LDM_UPD, [t2]STM_UPD 601 MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) 602 .addImm(Pred).addReg(PredReg); 603 } else { 604 // VLDM[SD}_UPD, VSTM[SD]_UPD 605 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) 606 .addImm(Pred).addReg(PredReg); 607 } 608 // Transfer the rest of operands. 609 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 610 MIB.addOperand(MI->getOperand(OpNum)); 611 // Transfer memoperands. 612 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 613 614 MBB.erase(MBBI); 615 return true; 616} 617 618static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 619 switch (Opc) { 620 case ARM::LDR: return ARM::LDR_PRE; 621 case ARM::STR: return ARM::STR_PRE; 622 case ARM::VLDRS: return ARM::VLDMS_UPD; 623 case ARM::VLDRD: return ARM::VLDMD_UPD; 624 case ARM::VSTRS: return ARM::VSTMS_UPD; 625 case ARM::VSTRD: return ARM::VSTMD_UPD; 626 case ARM::t2LDRi8: 627 case ARM::t2LDRi12: 628 return ARM::t2LDR_PRE; 629 case ARM::t2STRi8: 630 case ARM::t2STRi12: 631 return ARM::t2STR_PRE; 632 default: llvm_unreachable("Unhandled opcode!"); 633 } 634 return 0; 635} 636 637static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 638 switch (Opc) { 639 case ARM::LDR: return ARM::LDR_POST; 640 case ARM::STR: return ARM::STR_POST; 641 case ARM::VLDRS: return ARM::VLDMS_UPD; 642 case ARM::VLDRD: return ARM::VLDMD_UPD; 643 case ARM::VSTRS: return ARM::VSTMS_UPD; 644 case ARM::VSTRD: return ARM::VSTMD_UPD; 645 case ARM::t2LDRi8: 646 case ARM::t2LDRi12: 647 return ARM::t2LDR_POST; 648 case ARM::t2STRi8: 649 case ARM::t2STRi12: 650 return ARM::t2STR_POST; 651 default: llvm_unreachable("Unhandled opcode!"); 652 } 653 return 0; 654} 655 656/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 657/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 658bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 659 MachineBasicBlock::iterator MBBI, 660 const TargetInstrInfo *TII, 661 bool &Advance, 662 MachineBasicBlock::iterator &I) { 663 MachineInstr *MI = MBBI; 664 unsigned Base = MI->getOperand(1).getReg(); 665 bool BaseKill = MI->getOperand(1).isKill(); 666 unsigned Bytes = getLSMultipleTransferSize(MI); 667 int Opcode = MI->getOpcode(); 668 DebugLoc dl = MI->getDebugLoc(); 669 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 670 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 671 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 672 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 673 return false; 674 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 675 return false; 676 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 677 if (MI->getOperand(2).getImm() != 0) 678 return false; 679 680 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 681 // Can't do the merge if the destination register is the same as the would-be 682 // writeback register. 683 if (isLd && MI->getOperand(0).getReg() == Base) 684 return false; 685 686 unsigned PredReg = 0; 687 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 688 bool DoMerge = false; 689 ARM_AM::AddrOpc AddSub = ARM_AM::add; 690 unsigned NewOpc = 0; 691 // AM2 - 12 bits, thumb2 - 8 bits. 692 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 693 694 // Try merging with the previous instruction. 695 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 696 if (MBBI != BeginMBBI) { 697 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 698 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 699 --PrevMBBI; 700 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 701 DoMerge = true; 702 AddSub = ARM_AM::sub; 703 } else if (!isAM5 && 704 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 705 DoMerge = true; 706 } 707 if (DoMerge) { 708 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 709 MBB.erase(PrevMBBI); 710 } 711 } 712 713 // Try merging with the next instruction. 714 MachineBasicBlock::iterator EndMBBI = MBB.end(); 715 if (!DoMerge && MBBI != EndMBBI) { 716 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 717 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 718 ++NextMBBI; 719 if (!isAM5 && 720 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 721 DoMerge = true; 722 AddSub = ARM_AM::sub; 723 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 724 DoMerge = true; 725 } 726 if (DoMerge) { 727 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 728 if (NextMBBI == I) { 729 Advance = true; 730 ++I; 731 } 732 MBB.erase(NextMBBI); 733 } 734 } 735 736 if (!DoMerge) 737 return false; 738 739 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 740 unsigned Offset = 0; 741 if (isAM5) 742 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, 743 (isDPR ? 2 : 1)); 744 else if (isAM2) 745 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 746 else 747 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 748 749 if (isAM5) { 750 // VLDM[SD}_UPD, VSTM[SD]_UPD 751 MachineOperand &MO = MI->getOperand(0); 752 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 753 .addReg(Base, getDefRegState(true)) // WB base register 754 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 755 .addImm(Offset) 756 .addImm(Pred).addReg(PredReg) 757 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 758 getKillRegState(MO.isKill()))); 759 } else if (isLd) { 760 if (isAM2) 761 // LDR_PRE, LDR_POST, 762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 763 .addReg(Base, RegState::Define) 764 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 765 else 766 // t2LDR_PRE, t2LDR_POST 767 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 768 .addReg(Base, RegState::Define) 769 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 770 } else { 771 MachineOperand &MO = MI->getOperand(0); 772 if (isAM2) 773 // STR_PRE, STR_POST 774 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 775 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 776 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 777 else 778 // t2STR_PRE, t2STR_POST 779 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 780 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 781 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 782 } 783 MBB.erase(MBBI); 784 785 return true; 786} 787 788/// isMemoryOp - Returns true if instruction is a memory operations (that this 789/// pass is capable of operating on). 790static bool isMemoryOp(const MachineInstr *MI) { 791 // When no memory operands are present, conservatively assume unaligned, 792 // volatile, unfoldable. 793 if (!MI->hasOneMemOperand()) 794 return false; 795 796 const MachineMemOperand *MMO = *MI->memoperands_begin(); 797 798 // Don't touch volatile memory accesses - we may be changing their order. 799 if (MMO->isVolatile()) 800 return false; 801 802 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 803 // not. 804 if (MMO->getAlignment() < 4) 805 return false; 806 807 // str <undef> could probably be eliminated entirely, but for now we just want 808 // to avoid making a mess of it. 809 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 810 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 811 MI->getOperand(0).isUndef()) 812 return false; 813 814 // Likewise don't mess with references to undefined addresses. 815 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 816 MI->getOperand(1).isUndef()) 817 return false; 818 819 int Opcode = MI->getOpcode(); 820 switch (Opcode) { 821 default: break; 822 case ARM::LDR: 823 case ARM::STR: 824 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 825 case ARM::VLDRS: 826 case ARM::VSTRS: 827 return MI->getOperand(1).isReg(); 828 case ARM::VLDRD: 829 case ARM::VSTRD: 830 return MI->getOperand(1).isReg(); 831 case ARM::t2LDRi8: 832 case ARM::t2LDRi12: 833 case ARM::t2STRi8: 834 case ARM::t2STRi12: 835 return MI->getOperand(1).isReg(); 836 } 837 return false; 838} 839 840/// AdvanceRS - Advance register scavenger to just before the earliest memory 841/// op that is being merged. 842void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 843 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 844 unsigned Position = MemOps[0].Position; 845 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 846 if (MemOps[i].Position < Position) { 847 Position = MemOps[i].Position; 848 Loc = MemOps[i].MBBI; 849 } 850 } 851 852 if (Loc != MBB.begin()) 853 RS->forward(prior(Loc)); 854} 855 856static int getMemoryOpOffset(const MachineInstr *MI) { 857 int Opcode = MI->getOpcode(); 858 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 859 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 860 unsigned NumOperands = MI->getDesc().getNumOperands(); 861 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 862 863 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 864 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 865 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 866 return OffField; 867 868 int Offset = isAM2 869 ? ARM_AM::getAM2Offset(OffField) 870 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 871 : ARM_AM::getAM5Offset(OffField) * 4); 872 if (isAM2) { 873 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 874 Offset = -Offset; 875 } else if (isAM3) { 876 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 877 Offset = -Offset; 878 } else { 879 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 880 Offset = -Offset; 881 } 882 return Offset; 883} 884 885static void InsertLDR_STR(MachineBasicBlock &MBB, 886 MachineBasicBlock::iterator &MBBI, 887 int OffImm, bool isDef, 888 DebugLoc dl, unsigned NewOpc, 889 unsigned Reg, bool RegDeadKill, bool RegUndef, 890 unsigned BaseReg, bool BaseKill, bool BaseUndef, 891 unsigned OffReg, bool OffKill, bool OffUndef, 892 ARMCC::CondCodes Pred, unsigned PredReg, 893 const TargetInstrInfo *TII, bool isT2) { 894 int Offset = OffImm; 895 if (!isT2) { 896 if (OffImm < 0) 897 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 898 else 899 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 900 } 901 if (isDef) { 902 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 903 TII->get(NewOpc)) 904 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 905 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 906 if (!isT2) 907 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 908 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 909 } else { 910 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 911 TII->get(NewOpc)) 912 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 913 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 914 if (!isT2) 915 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 916 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 917 } 918} 919 920bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 921 MachineBasicBlock::iterator &MBBI) { 922 MachineInstr *MI = &*MBBI; 923 unsigned Opcode = MI->getOpcode(); 924 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 925 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 926 unsigned EvenReg = MI->getOperand(0).getReg(); 927 unsigned OddReg = MI->getOperand(1).getReg(); 928 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 929 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 930 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 931 return false; 932 933 MachineBasicBlock::iterator NewBBI = MBBI; 934 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 935 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 936 bool EvenDeadKill = isLd ? 937 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 938 bool EvenUndef = MI->getOperand(0).isUndef(); 939 bool OddDeadKill = isLd ? 940 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 941 bool OddUndef = MI->getOperand(1).isUndef(); 942 const MachineOperand &BaseOp = MI->getOperand(2); 943 unsigned BaseReg = BaseOp.getReg(); 944 bool BaseKill = BaseOp.isKill(); 945 bool BaseUndef = BaseOp.isUndef(); 946 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 947 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 948 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 949 int OffImm = getMemoryOpOffset(MI); 950 unsigned PredReg = 0; 951 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 952 953 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 954 // Ascending register numbers and no offset. It's safe to change it to a 955 // ldm or stm. 956 unsigned NewOpc = (isLd) 957 ? (isT2 ? ARM::t2LDM : ARM::LDM) 958 : (isT2 ? ARM::t2STM : ARM::STM); 959 if (isLd) { 960 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 961 .addReg(BaseReg, getKillRegState(BaseKill)) 962 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 963 .addImm(Pred).addReg(PredReg) 964 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 965 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 966 ++NumLDRD2LDM; 967 } else { 968 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 969 .addReg(BaseReg, getKillRegState(BaseKill)) 970 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 971 .addImm(Pred).addReg(PredReg) 972 .addReg(EvenReg, 973 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 974 .addReg(OddReg, 975 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 976 ++NumSTRD2STM; 977 } 978 NewBBI = llvm::prior(MBBI); 979 } else { 980 // Split into two instructions. 981 assert((!isT2 || !OffReg) && 982 "Thumb2 ldrd / strd does not encode offset register!"); 983 unsigned NewOpc = (isLd) 984 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 985 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 986 DebugLoc dl = MBBI->getDebugLoc(); 987 // If this is a load and base register is killed, it may have been 988 // re-defed by the load, make sure the first load does not clobber it. 989 if (isLd && 990 (BaseKill || OffKill) && 991 (TRI->regsOverlap(EvenReg, BaseReg) || 992 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 993 assert(!TRI->regsOverlap(OddReg, BaseReg) && 994 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 995 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 996 OddReg, OddDeadKill, false, 997 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 998 Pred, PredReg, TII, isT2); 999 NewBBI = llvm::prior(MBBI); 1000 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1001 EvenReg, EvenDeadKill, false, 1002 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 1003 Pred, PredReg, TII, isT2); 1004 } else { 1005 if (OddReg == EvenReg && EvenDeadKill) { 1006 // If the two source operands are the same, the kill marker is 1007 // probably on the first one. e.g. 1008 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1009 EvenDeadKill = false; 1010 OddDeadKill = true; 1011 } 1012 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1013 EvenReg, EvenDeadKill, EvenUndef, 1014 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 1015 Pred, PredReg, TII, isT2); 1016 NewBBI = llvm::prior(MBBI); 1017 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1018 OddReg, OddDeadKill, OddUndef, 1019 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 1020 Pred, PredReg, TII, isT2); 1021 } 1022 if (isLd) 1023 ++NumLDRD2LDR; 1024 else 1025 ++NumSTRD2STR; 1026 } 1027 1028 MBB.erase(MI); 1029 MBBI = NewBBI; 1030 return true; 1031 } 1032 return false; 1033} 1034 1035/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1036/// ops of the same base and incrementing offset into LDM / STM ops. 1037bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1038 unsigned NumMerges = 0; 1039 unsigned NumMemOps = 0; 1040 MemOpQueue MemOps; 1041 unsigned CurrBase = 0; 1042 int CurrOpc = -1; 1043 unsigned CurrSize = 0; 1044 ARMCC::CondCodes CurrPred = ARMCC::AL; 1045 unsigned CurrPredReg = 0; 1046 unsigned Position = 0; 1047 SmallVector<MachineBasicBlock::iterator,4> Merges; 1048 1049 RS->enterBasicBlock(&MBB); 1050 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1051 while (MBBI != E) { 1052 if (FixInvalidRegPairOp(MBB, MBBI)) 1053 continue; 1054 1055 bool Advance = false; 1056 bool TryMerge = false; 1057 bool Clobber = false; 1058 1059 bool isMemOp = isMemoryOp(MBBI); 1060 if (isMemOp) { 1061 int Opcode = MBBI->getOpcode(); 1062 unsigned Size = getLSMultipleTransferSize(MBBI); 1063 const MachineOperand &MO = MBBI->getOperand(0); 1064 unsigned Reg = MO.getReg(); 1065 bool isKill = MO.isDef() ? false : MO.isKill(); 1066 unsigned Base = MBBI->getOperand(1).getReg(); 1067 unsigned PredReg = 0; 1068 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1069 int Offset = getMemoryOpOffset(MBBI); 1070 // Watch out for: 1071 // r4 := ldr [r5] 1072 // r5 := ldr [r5, #4] 1073 // r6 := ldr [r5, #8] 1074 // 1075 // The second ldr has effectively broken the chain even though it 1076 // looks like the later ldr(s) use the same base register. Try to 1077 // merge the ldr's so far, including this one. But don't try to 1078 // combine the following ldr(s). 1079 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1080 if (CurrBase == 0 && !Clobber) { 1081 // Start of a new chain. 1082 CurrBase = Base; 1083 CurrOpc = Opcode; 1084 CurrSize = Size; 1085 CurrPred = Pred; 1086 CurrPredReg = PredReg; 1087 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1088 ++NumMemOps; 1089 Advance = true; 1090 } else { 1091 if (Clobber) { 1092 TryMerge = true; 1093 Advance = true; 1094 } 1095 1096 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1097 // No need to match PredReg. 1098 // Continue adding to the queue. 1099 if (Offset > MemOps.back().Offset) { 1100 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1101 Position, MBBI)); 1102 ++NumMemOps; 1103 Advance = true; 1104 } else { 1105 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1106 I != E; ++I) { 1107 if (Offset < I->Offset) { 1108 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1109 Position, MBBI)); 1110 ++NumMemOps; 1111 Advance = true; 1112 break; 1113 } else if (Offset == I->Offset) { 1114 // Collision! This can't be merged! 1115 break; 1116 } 1117 } 1118 } 1119 } 1120 } 1121 } 1122 1123 if (MBBI->isDebugValue()) { 1124 ++MBBI; 1125 if (MBBI == E) 1126 // Reach the end of the block, try merging the memory instructions. 1127 TryMerge = true; 1128 } else if (Advance) { 1129 ++Position; 1130 ++MBBI; 1131 if (MBBI == E) 1132 // Reach the end of the block, try merging the memory instructions. 1133 TryMerge = true; 1134 } else 1135 TryMerge = true; 1136 1137 if (TryMerge) { 1138 if (NumMemOps > 1) { 1139 // Try to find a free register to use as a new base in case it's needed. 1140 // First advance to the instruction just before the start of the chain. 1141 AdvanceRS(MBB, MemOps); 1142 // Find a scratch register. 1143 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1144 // Process the load / store instructions. 1145 RS->forward(prior(MBBI)); 1146 1147 // Merge ops. 1148 Merges.clear(); 1149 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1150 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1151 1152 // Try folding preceeding/trailing base inc/dec into the generated 1153 // LDM/STM ops. 1154 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1155 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1156 ++NumMerges; 1157 NumMerges += Merges.size(); 1158 1159 // Try folding preceeding/trailing base inc/dec into those load/store 1160 // that were not merged to form LDM/STM ops. 1161 for (unsigned i = 0; i != NumMemOps; ++i) 1162 if (!MemOps[i].Merged) 1163 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1164 ++NumMerges; 1165 1166 // RS may be pointing to an instruction that's deleted. 1167 RS->skipTo(prior(MBBI)); 1168 } else if (NumMemOps == 1) { 1169 // Try folding preceeding/trailing base inc/dec into the single 1170 // load/store. 1171 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1172 ++NumMerges; 1173 RS->forward(prior(MBBI)); 1174 } 1175 } 1176 1177 CurrBase = 0; 1178 CurrOpc = -1; 1179 CurrSize = 0; 1180 CurrPred = ARMCC::AL; 1181 CurrPredReg = 0; 1182 if (NumMemOps) { 1183 MemOps.clear(); 1184 NumMemOps = 0; 1185 } 1186 1187 // If iterator hasn't been advanced and this is not a memory op, skip it. 1188 // It can't start a new chain anyway. 1189 if (!Advance && !isMemOp && MBBI != E) { 1190 ++Position; 1191 ++MBBI; 1192 } 1193 } 1194 } 1195 return NumMerges > 0; 1196} 1197 1198namespace { 1199 struct OffsetCompare { 1200 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1201 int LOffset = getMemoryOpOffset(LHS); 1202 int ROffset = getMemoryOpOffset(RHS); 1203 assert(LHS == RHS || LOffset != ROffset); 1204 return LOffset > ROffset; 1205 } 1206 }; 1207} 1208 1209/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1210/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1211/// directly restore the value of LR into pc. 1212/// ldmfd sp!, {..., lr} 1213/// bx lr 1214/// or 1215/// ldmfd sp!, {..., lr} 1216/// mov pc, lr 1217/// => 1218/// ldmfd sp!, {..., pc} 1219bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1220 if (MBB.empty()) return false; 1221 1222 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1223 if (MBBI != MBB.begin() && 1224 (MBBI->getOpcode() == ARM::BX_RET || 1225 MBBI->getOpcode() == ARM::tBX_RET || 1226 MBBI->getOpcode() == ARM::MOVPCLR)) { 1227 MachineInstr *PrevMI = prior(MBBI); 1228 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1229 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1230 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1231 if (MO.getReg() != ARM::LR) 1232 return false; 1233 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1234 PrevMI->setDesc(TII->get(NewOpc)); 1235 MO.setReg(ARM::PC); 1236 MBB.erase(MBBI); 1237 return true; 1238 } 1239 } 1240 return false; 1241} 1242 1243bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1244 const TargetMachine &TM = Fn.getTarget(); 1245 AFI = Fn.getInfo<ARMFunctionInfo>(); 1246 TII = TM.getInstrInfo(); 1247 TRI = TM.getRegisterInfo(); 1248 RS = new RegScavenger(); 1249 isThumb2 = AFI->isThumb2Function(); 1250 1251 bool Modified = false; 1252 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1253 ++MFI) { 1254 MachineBasicBlock &MBB = *MFI; 1255 Modified |= LoadStoreMultipleOpti(MBB); 1256 Modified |= MergeReturnIntoLDM(MBB); 1257 } 1258 1259 delete RS; 1260 return Modified; 1261} 1262 1263 1264/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1265/// load / stores from consecutive locations close to make it more 1266/// likely they will be combined later. 1267 1268namespace { 1269 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1270 static char ID; 1271 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1272 1273 const TargetData *TD; 1274 const TargetInstrInfo *TII; 1275 const TargetRegisterInfo *TRI; 1276 const ARMSubtarget *STI; 1277 MachineRegisterInfo *MRI; 1278 MachineFunction *MF; 1279 1280 virtual bool runOnMachineFunction(MachineFunction &Fn); 1281 1282 virtual const char *getPassName() const { 1283 return "ARM pre- register allocation load / store optimization pass"; 1284 } 1285 1286 private: 1287 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1288 unsigned &NewOpc, unsigned &EvenReg, 1289 unsigned &OddReg, unsigned &BaseReg, 1290 unsigned &OffReg, int &Offset, 1291 unsigned &PredReg, ARMCC::CondCodes &Pred, 1292 bool &isT2); 1293 bool RescheduleOps(MachineBasicBlock *MBB, 1294 SmallVector<MachineInstr*, 4> &Ops, 1295 unsigned Base, bool isLd, 1296 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1297 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1298 }; 1299 char ARMPreAllocLoadStoreOpt::ID = 0; 1300} 1301 1302bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1303 TD = Fn.getTarget().getTargetData(); 1304 TII = Fn.getTarget().getInstrInfo(); 1305 TRI = Fn.getTarget().getRegisterInfo(); 1306 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1307 MRI = &Fn.getRegInfo(); 1308 MF = &Fn; 1309 1310 bool Modified = false; 1311 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1312 ++MFI) 1313 Modified |= RescheduleLoadStoreInstrs(MFI); 1314 1315 return Modified; 1316} 1317 1318static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1319 MachineBasicBlock::iterator I, 1320 MachineBasicBlock::iterator E, 1321 SmallPtrSet<MachineInstr*, 4> &MemOps, 1322 SmallSet<unsigned, 4> &MemRegs, 1323 const TargetRegisterInfo *TRI) { 1324 // Are there stores / loads / calls between them? 1325 // FIXME: This is overly conservative. We should make use of alias information 1326 // some day. 1327 SmallSet<unsigned, 4> AddedRegPressure; 1328 while (++I != E) { 1329 if (I->isDebugValue() || MemOps.count(&*I)) 1330 continue; 1331 const TargetInstrDesc &TID = I->getDesc(); 1332 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1333 return false; 1334 if (isLd && TID.mayStore()) 1335 return false; 1336 if (!isLd) { 1337 if (TID.mayLoad()) 1338 return false; 1339 // It's not safe to move the first 'str' down. 1340 // str r1, [r0] 1341 // strh r5, [r0] 1342 // str r4, [r0, #+4] 1343 if (TID.mayStore()) 1344 return false; 1345 } 1346 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1347 MachineOperand &MO = I->getOperand(j); 1348 if (!MO.isReg()) 1349 continue; 1350 unsigned Reg = MO.getReg(); 1351 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1352 return false; 1353 if (Reg != Base && !MemRegs.count(Reg)) 1354 AddedRegPressure.insert(Reg); 1355 } 1356 } 1357 1358 // Estimate register pressure increase due to the transformation. 1359 if (MemRegs.size() <= 4) 1360 // Ok if we are moving small number of instructions. 1361 return true; 1362 return AddedRegPressure.size() <= MemRegs.size() * 2; 1363} 1364 1365bool 1366ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1367 DebugLoc &dl, 1368 unsigned &NewOpc, unsigned &EvenReg, 1369 unsigned &OddReg, unsigned &BaseReg, 1370 unsigned &OffReg, int &Offset, 1371 unsigned &PredReg, 1372 ARMCC::CondCodes &Pred, 1373 bool &isT2) { 1374 // Make sure we're allowed to generate LDRD/STRD. 1375 if (!STI->hasV5TEOps()) 1376 return false; 1377 1378 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1379 unsigned Scale = 1; 1380 unsigned Opcode = Op0->getOpcode(); 1381 if (Opcode == ARM::LDR) 1382 NewOpc = ARM::LDRD; 1383 else if (Opcode == ARM::STR) 1384 NewOpc = ARM::STRD; 1385 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1386 NewOpc = ARM::t2LDRDi8; 1387 Scale = 4; 1388 isT2 = true; 1389 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1390 NewOpc = ARM::t2STRDi8; 1391 Scale = 4; 1392 isT2 = true; 1393 } else 1394 return false; 1395 1396 // Make sure the offset registers match. 1397 if (!isT2 && 1398 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1399 return false; 1400 1401 // Must sure the base address satisfies i64 ld / st alignment requirement. 1402 if (!Op0->hasOneMemOperand() || 1403 !(*Op0->memoperands_begin())->getValue() || 1404 (*Op0->memoperands_begin())->isVolatile()) 1405 return false; 1406 1407 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1408 const Function *Func = MF->getFunction(); 1409 unsigned ReqAlign = STI->hasV6Ops() 1410 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1411 : 8; // Pre-v6 need 8-byte align 1412 if (Align < ReqAlign) 1413 return false; 1414 1415 // Then make sure the immediate offset fits. 1416 int OffImm = getMemoryOpOffset(Op0); 1417 if (isT2) { 1418 if (OffImm < 0) { 1419 if (OffImm < -255) 1420 // Can't fall back to t2LDRi8 / t2STRi8. 1421 return false; 1422 } else { 1423 int Limit = (1 << 8) * Scale; 1424 if (OffImm >= Limit || (OffImm & (Scale-1))) 1425 return false; 1426 } 1427 Offset = OffImm; 1428 } else { 1429 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1430 if (OffImm < 0) { 1431 AddSub = ARM_AM::sub; 1432 OffImm = - OffImm; 1433 } 1434 int Limit = (1 << 8) * Scale; 1435 if (OffImm >= Limit || (OffImm & (Scale-1))) 1436 return false; 1437 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1438 } 1439 EvenReg = Op0->getOperand(0).getReg(); 1440 OddReg = Op1->getOperand(0).getReg(); 1441 if (EvenReg == OddReg) 1442 return false; 1443 BaseReg = Op0->getOperand(1).getReg(); 1444 if (!isT2) 1445 OffReg = Op0->getOperand(2).getReg(); 1446 Pred = llvm::getInstrPredicate(Op0, PredReg); 1447 dl = Op0->getDebugLoc(); 1448 return true; 1449} 1450 1451bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1452 SmallVector<MachineInstr*, 4> &Ops, 1453 unsigned Base, bool isLd, 1454 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1455 bool RetVal = false; 1456 1457 // Sort by offset (in reverse order). 1458 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1459 1460 // The loads / stores of the same base are in order. Scan them from first to 1461 // last and check for the following: 1462 // 1. Any def of base. 1463 // 2. Any gaps. 1464 while (Ops.size() > 1) { 1465 unsigned FirstLoc = ~0U; 1466 unsigned LastLoc = 0; 1467 MachineInstr *FirstOp = 0; 1468 MachineInstr *LastOp = 0; 1469 int LastOffset = 0; 1470 unsigned LastOpcode = 0; 1471 unsigned LastBytes = 0; 1472 unsigned NumMove = 0; 1473 for (int i = Ops.size() - 1; i >= 0; --i) { 1474 MachineInstr *Op = Ops[i]; 1475 unsigned Loc = MI2LocMap[Op]; 1476 if (Loc <= FirstLoc) { 1477 FirstLoc = Loc; 1478 FirstOp = Op; 1479 } 1480 if (Loc >= LastLoc) { 1481 LastLoc = Loc; 1482 LastOp = Op; 1483 } 1484 1485 unsigned Opcode = Op->getOpcode(); 1486 if (LastOpcode && Opcode != LastOpcode) 1487 break; 1488 1489 int Offset = getMemoryOpOffset(Op); 1490 unsigned Bytes = getLSMultipleTransferSize(Op); 1491 if (LastBytes) { 1492 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1493 break; 1494 } 1495 LastOffset = Offset; 1496 LastBytes = Bytes; 1497 LastOpcode = Opcode; 1498 if (++NumMove == 8) // FIXME: Tune this limit. 1499 break; 1500 } 1501 1502 if (NumMove <= 1) 1503 Ops.pop_back(); 1504 else { 1505 SmallPtrSet<MachineInstr*, 4> MemOps; 1506 SmallSet<unsigned, 4> MemRegs; 1507 for (int i = NumMove-1; i >= 0; --i) { 1508 MemOps.insert(Ops[i]); 1509 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1510 } 1511 1512 // Be conservative, if the instructions are too far apart, don't 1513 // move them. We want to limit the increase of register pressure. 1514 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1515 if (DoMove) 1516 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1517 MemOps, MemRegs, TRI); 1518 if (!DoMove) { 1519 for (unsigned i = 0; i != NumMove; ++i) 1520 Ops.pop_back(); 1521 } else { 1522 // This is the new location for the loads / stores. 1523 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1524 while (InsertPos != MBB->end() 1525 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1526 ++InsertPos; 1527 1528 // If we are moving a pair of loads / stores, see if it makes sense 1529 // to try to allocate a pair of registers that can form register pairs. 1530 MachineInstr *Op0 = Ops.back(); 1531 MachineInstr *Op1 = Ops[Ops.size()-2]; 1532 unsigned EvenReg = 0, OddReg = 0; 1533 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1534 ARMCC::CondCodes Pred = ARMCC::AL; 1535 bool isT2 = false; 1536 unsigned NewOpc = 0; 1537 int Offset = 0; 1538 DebugLoc dl; 1539 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1540 EvenReg, OddReg, BaseReg, OffReg, 1541 Offset, PredReg, Pred, isT2)) { 1542 Ops.pop_back(); 1543 Ops.pop_back(); 1544 1545 // Form the pair instruction. 1546 if (isLd) { 1547 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1548 dl, TII->get(NewOpc)) 1549 .addReg(EvenReg, RegState::Define) 1550 .addReg(OddReg, RegState::Define) 1551 .addReg(BaseReg); 1552 if (!isT2) 1553 MIB.addReg(OffReg); 1554 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1555 ++NumLDRDFormed; 1556 } else { 1557 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1558 dl, TII->get(NewOpc)) 1559 .addReg(EvenReg) 1560 .addReg(OddReg) 1561 .addReg(BaseReg); 1562 if (!isT2) 1563 MIB.addReg(OffReg); 1564 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1565 ++NumSTRDFormed; 1566 } 1567 MBB->erase(Op0); 1568 MBB->erase(Op1); 1569 1570 // Add register allocation hints to form register pairs. 1571 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1572 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1573 } else { 1574 for (unsigned i = 0; i != NumMove; ++i) { 1575 MachineInstr *Op = Ops.back(); 1576 Ops.pop_back(); 1577 MBB->splice(InsertPos, MBB, Op); 1578 } 1579 } 1580 1581 NumLdStMoved += NumMove; 1582 RetVal = true; 1583 } 1584 } 1585 } 1586 1587 return RetVal; 1588} 1589 1590bool 1591ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1592 bool RetVal = false; 1593 1594 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1595 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1596 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1597 SmallVector<unsigned, 4> LdBases; 1598 SmallVector<unsigned, 4> StBases; 1599 1600 unsigned Loc = 0; 1601 MachineBasicBlock::iterator MBBI = MBB->begin(); 1602 MachineBasicBlock::iterator E = MBB->end(); 1603 while (MBBI != E) { 1604 for (; MBBI != E; ++MBBI) { 1605 MachineInstr *MI = MBBI; 1606 const TargetInstrDesc &TID = MI->getDesc(); 1607 if (TID.isCall() || TID.isTerminator()) { 1608 // Stop at barriers. 1609 ++MBBI; 1610 break; 1611 } 1612 1613 if (!MI->isDebugValue()) 1614 MI2LocMap[MI] = ++Loc; 1615 1616 if (!isMemoryOp(MI)) 1617 continue; 1618 unsigned PredReg = 0; 1619 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1620 continue; 1621 1622 int Opc = MI->getOpcode(); 1623 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1624 unsigned Base = MI->getOperand(1).getReg(); 1625 int Offset = getMemoryOpOffset(MI); 1626 1627 bool StopHere = false; 1628 if (isLd) { 1629 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1630 Base2LdsMap.find(Base); 1631 if (BI != Base2LdsMap.end()) { 1632 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1633 if (Offset == getMemoryOpOffset(BI->second[i])) { 1634 StopHere = true; 1635 break; 1636 } 1637 } 1638 if (!StopHere) 1639 BI->second.push_back(MI); 1640 } else { 1641 SmallVector<MachineInstr*, 4> MIs; 1642 MIs.push_back(MI); 1643 Base2LdsMap[Base] = MIs; 1644 LdBases.push_back(Base); 1645 } 1646 } else { 1647 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1648 Base2StsMap.find(Base); 1649 if (BI != Base2StsMap.end()) { 1650 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1651 if (Offset == getMemoryOpOffset(BI->second[i])) { 1652 StopHere = true; 1653 break; 1654 } 1655 } 1656 if (!StopHere) 1657 BI->second.push_back(MI); 1658 } else { 1659 SmallVector<MachineInstr*, 4> MIs; 1660 MIs.push_back(MI); 1661 Base2StsMap[Base] = MIs; 1662 StBases.push_back(Base); 1663 } 1664 } 1665 1666 if (StopHere) { 1667 // Found a duplicate (a base+offset combination that's seen earlier). 1668 // Backtrack. 1669 --Loc; 1670 break; 1671 } 1672 } 1673 1674 // Re-schedule loads. 1675 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1676 unsigned Base = LdBases[i]; 1677 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1678 if (Lds.size() > 1) 1679 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1680 } 1681 1682 // Re-schedule stores. 1683 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1684 unsigned Base = StBases[i]; 1685 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1686 if (Sts.size() > 1) 1687 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1688 } 1689 1690 if (MBBI != E) { 1691 Base2LdsMap.clear(); 1692 Base2StsMap.clear(); 1693 LdBases.clear(); 1694 StBases.clear(); 1695 } 1696 } 1697 1698 return RetVal; 1699} 1700 1701 1702/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1703/// optimization pass. 1704FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1705 if (PreAlloc) 1706 return new ARMPreAllocLoadStoreOpt(); 1707 return new ARMLoadStoreOpt(); 1708} 1709