1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#include "ARM.h" 16#include "ARMBaseInstrInfo.h" 17#include "ARMBaseRegisterInfo.h" 18#include "ARMISelLowering.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMSubtarget.h" 21#include "MCTargetDesc/ARMAddressingModes.h" 22#include "ThumbRegisterInfo.h" 23#include "llvm/ADT/DenseMap.h" 24#include "llvm/ADT/STLExtras.h" 25#include "llvm/ADT/SmallPtrSet.h" 26#include "llvm/ADT/SmallSet.h" 27#include "llvm/ADT/SmallVector.h" 28#include "llvm/ADT/Statistic.h" 29#include "llvm/CodeGen/MachineBasicBlock.h" 30#include "llvm/CodeGen/MachineFunctionPass.h" 31#include "llvm/CodeGen/MachineInstr.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/CodeGen/RegisterScavenging.h" 35#include "llvm/CodeGen/SelectionDAGNodes.h" 36#include "llvm/IR/DataLayout.h" 37#include "llvm/IR/DerivedTypes.h" 38#include "llvm/IR/Function.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetMachine.h" 44#include "llvm/Target/TargetRegisterInfo.h" 45using namespace llvm; 46 47#define DEBUG_TYPE "arm-ldst-opt" 48 49STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 50STATISTIC(NumSTMGened , "Number of stm instructions generated"); 51STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 52STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 53STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 54STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 55STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 56STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 57STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 58STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 59STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 60 61/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 62/// load / store instructions to form ldm / stm instructions. 63 64namespace { 65 struct ARMLoadStoreOpt : public MachineFunctionPass { 66 static char ID; 67 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 68 69 const TargetInstrInfo *TII; 70 const TargetRegisterInfo *TRI; 71 const ARMSubtarget *STI; 72 const TargetLowering *TL; 73 ARMFunctionInfo *AFI; 74 RegScavenger *RS; 75 bool isThumb1, isThumb2; 76 77 bool runOnMachineFunction(MachineFunction &Fn) override; 78 79 const char *getPassName() const override { 80 return "ARM load / store optimization pass"; 81 } 82 83 private: 84 struct MemOpQueueEntry { 85 int Offset; 86 unsigned Reg; 87 bool isKill; 88 unsigned Position; 89 MachineBasicBlock::iterator MBBI; 90 bool Merged; 91 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 92 MachineBasicBlock::iterator i) 93 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 94 }; 95 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 96 typedef MemOpQueue::iterator MemOpQueueIter; 97 98 void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, 99 const MemOpQueue &MemOps, unsigned DefReg, 100 unsigned RangeBegin, unsigned RangeEnd); 101 void UpdateBaseRegUses(MachineBasicBlock &MBB, 102 MachineBasicBlock::iterator MBBI, 103 DebugLoc dl, unsigned Base, unsigned WordOffset, 104 ARMCC::CondCodes Pred, unsigned PredReg); 105 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 106 int Offset, unsigned Base, bool BaseKill, int Opcode, 107 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 108 DebugLoc dl, 109 ArrayRef<std::pair<unsigned, bool> > Regs, 110 ArrayRef<unsigned> ImpDefs); 111 void MergeOpsUpdate(MachineBasicBlock &MBB, 112 MemOpQueue &MemOps, 113 unsigned memOpsBegin, 114 unsigned memOpsEnd, 115 unsigned insertAfter, 116 int Offset, 117 unsigned Base, 118 bool BaseKill, 119 int Opcode, 120 ARMCC::CondCodes Pred, 121 unsigned PredReg, 122 unsigned Scratch, 123 DebugLoc dl, 124 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 125 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 126 int Opcode, unsigned Size, 127 ARMCC::CondCodes Pred, unsigned PredReg, 128 unsigned Scratch, MemOpQueue &MemOps, 129 SmallVectorImpl<MachineBasicBlock::iterator> &Merges); 130 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 131 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 132 MachineBasicBlock::iterator &MBBI); 133 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 134 MachineBasicBlock::iterator MBBI, 135 const TargetInstrInfo *TII, 136 bool &Advance, 137 MachineBasicBlock::iterator &I); 138 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 139 MachineBasicBlock::iterator MBBI, 140 bool &Advance, 141 MachineBasicBlock::iterator &I); 142 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 143 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 144 }; 145 char ARMLoadStoreOpt::ID = 0; 146} 147 148static bool definesCPSR(const MachineInstr *MI) { 149 for (const auto &MO : MI->operands()) { 150 if (!MO.isReg()) 151 continue; 152 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 153 // If the instruction has live CPSR def, then it's not safe to fold it 154 // into load / store. 155 return true; 156 } 157 158 return false; 159} 160 161static int getMemoryOpOffset(const MachineInstr *MI) { 162 int Opcode = MI->getOpcode(); 163 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 164 unsigned NumOperands = MI->getDesc().getNumOperands(); 165 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 166 167 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 168 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 169 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 170 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 171 return OffField; 172 173 // Thumb1 immediate offsets are scaled by 4 174 if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || 175 Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) 176 return OffField * 4; 177 178 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 179 : ARM_AM::getAM5Offset(OffField) * 4; 180 ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) 181 : ARM_AM::getAM5Op(OffField); 182 183 if (Op == ARM_AM::sub) 184 return -Offset; 185 186 return Offset; 187} 188 189static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 190 switch (Opcode) { 191 default: llvm_unreachable("Unhandled opcode!"); 192 case ARM::LDRi12: 193 ++NumLDMGened; 194 switch (Mode) { 195 default: llvm_unreachable("Unhandled submode!"); 196 case ARM_AM::ia: return ARM::LDMIA; 197 case ARM_AM::da: return ARM::LDMDA; 198 case ARM_AM::db: return ARM::LDMDB; 199 case ARM_AM::ib: return ARM::LDMIB; 200 } 201 case ARM::STRi12: 202 ++NumSTMGened; 203 switch (Mode) { 204 default: llvm_unreachable("Unhandled submode!"); 205 case ARM_AM::ia: return ARM::STMIA; 206 case ARM_AM::da: return ARM::STMDA; 207 case ARM_AM::db: return ARM::STMDB; 208 case ARM_AM::ib: return ARM::STMIB; 209 } 210 case ARM::tLDRi: 211 case ARM::tLDRspi: 212 // tLDMIA is writeback-only - unless the base register is in the input 213 // reglist. 214 ++NumLDMGened; 215 switch (Mode) { 216 default: llvm_unreachable("Unhandled submode!"); 217 case ARM_AM::ia: return ARM::tLDMIA; 218 } 219 case ARM::tSTRi: 220 case ARM::tSTRspi: 221 // There is no non-writeback tSTMIA either. 222 ++NumSTMGened; 223 switch (Mode) { 224 default: llvm_unreachable("Unhandled submode!"); 225 case ARM_AM::ia: return ARM::tSTMIA_UPD; 226 } 227 case ARM::t2LDRi8: 228 case ARM::t2LDRi12: 229 ++NumLDMGened; 230 switch (Mode) { 231 default: llvm_unreachable("Unhandled submode!"); 232 case ARM_AM::ia: return ARM::t2LDMIA; 233 case ARM_AM::db: return ARM::t2LDMDB; 234 } 235 case ARM::t2STRi8: 236 case ARM::t2STRi12: 237 ++NumSTMGened; 238 switch (Mode) { 239 default: llvm_unreachable("Unhandled submode!"); 240 case ARM_AM::ia: return ARM::t2STMIA; 241 case ARM_AM::db: return ARM::t2STMDB; 242 } 243 case ARM::VLDRS: 244 ++NumVLDMGened; 245 switch (Mode) { 246 default: llvm_unreachable("Unhandled submode!"); 247 case ARM_AM::ia: return ARM::VLDMSIA; 248 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 249 } 250 case ARM::VSTRS: 251 ++NumVSTMGened; 252 switch (Mode) { 253 default: llvm_unreachable("Unhandled submode!"); 254 case ARM_AM::ia: return ARM::VSTMSIA; 255 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 256 } 257 case ARM::VLDRD: 258 ++NumVLDMGened; 259 switch (Mode) { 260 default: llvm_unreachable("Unhandled submode!"); 261 case ARM_AM::ia: return ARM::VLDMDIA; 262 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 263 } 264 case ARM::VSTRD: 265 ++NumVSTMGened; 266 switch (Mode) { 267 default: llvm_unreachable("Unhandled submode!"); 268 case ARM_AM::ia: return ARM::VSTMDIA; 269 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 270 } 271 } 272} 273 274namespace llvm { 275 namespace ARM_AM { 276 277AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 278 switch (Opcode) { 279 default: llvm_unreachable("Unhandled opcode!"); 280 case ARM::LDMIA_RET: 281 case ARM::LDMIA: 282 case ARM::LDMIA_UPD: 283 case ARM::STMIA: 284 case ARM::STMIA_UPD: 285 case ARM::tLDMIA: 286 case ARM::tLDMIA_UPD: 287 case ARM::tSTMIA_UPD: 288 case ARM::t2LDMIA_RET: 289 case ARM::t2LDMIA: 290 case ARM::t2LDMIA_UPD: 291 case ARM::t2STMIA: 292 case ARM::t2STMIA_UPD: 293 case ARM::VLDMSIA: 294 case ARM::VLDMSIA_UPD: 295 case ARM::VSTMSIA: 296 case ARM::VSTMSIA_UPD: 297 case ARM::VLDMDIA: 298 case ARM::VLDMDIA_UPD: 299 case ARM::VSTMDIA: 300 case ARM::VSTMDIA_UPD: 301 return ARM_AM::ia; 302 303 case ARM::LDMDA: 304 case ARM::LDMDA_UPD: 305 case ARM::STMDA: 306 case ARM::STMDA_UPD: 307 return ARM_AM::da; 308 309 case ARM::LDMDB: 310 case ARM::LDMDB_UPD: 311 case ARM::STMDB: 312 case ARM::STMDB_UPD: 313 case ARM::t2LDMDB: 314 case ARM::t2LDMDB_UPD: 315 case ARM::t2STMDB: 316 case ARM::t2STMDB_UPD: 317 case ARM::VLDMSDB_UPD: 318 case ARM::VSTMSDB_UPD: 319 case ARM::VLDMDDB_UPD: 320 case ARM::VSTMDDB_UPD: 321 return ARM_AM::db; 322 323 case ARM::LDMIB: 324 case ARM::LDMIB_UPD: 325 case ARM::STMIB: 326 case ARM::STMIB_UPD: 327 return ARM_AM::ib; 328 } 329} 330 331 } // end namespace ARM_AM 332} // end namespace llvm 333 334static bool isT1i32Load(unsigned Opc) { 335 return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; 336} 337 338static bool isT2i32Load(unsigned Opc) { 339 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 340} 341 342static bool isi32Load(unsigned Opc) { 343 return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; 344} 345 346static bool isT1i32Store(unsigned Opc) { 347 return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; 348} 349 350static bool isT2i32Store(unsigned Opc) { 351 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 352} 353 354static bool isi32Store(unsigned Opc) { 355 return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); 356} 357 358static unsigned getImmScale(unsigned Opc) { 359 switch (Opc) { 360 default: llvm_unreachable("Unhandled opcode!"); 361 case ARM::tLDRi: 362 case ARM::tSTRi: 363 case ARM::tLDRspi: 364 case ARM::tSTRspi: 365 return 1; 366 case ARM::tLDRHi: 367 case ARM::tSTRHi: 368 return 2; 369 case ARM::tLDRBi: 370 case ARM::tSTRBi: 371 return 4; 372 } 373} 374 375/// Update future uses of the base register with the offset introduced 376/// due to writeback. This function only works on Thumb1. 377void 378ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, 379 MachineBasicBlock::iterator MBBI, 380 DebugLoc dl, unsigned Base, 381 unsigned WordOffset, 382 ARMCC::CondCodes Pred, unsigned PredReg) { 383 assert(isThumb1 && "Can only update base register uses for Thumb1!"); 384 // Start updating any instructions with immediate offsets. Insert a SUB before 385 // the first non-updateable instruction (if any). 386 for (; MBBI != MBB.end(); ++MBBI) { 387 bool InsertSub = false; 388 unsigned Opc = MBBI->getOpcode(); 389 390 if (MBBI->readsRegister(Base)) { 391 int Offset; 392 bool IsLoad = 393 Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; 394 bool IsStore = 395 Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; 396 397 if (IsLoad || IsStore) { 398 // Loads and stores with immediate offsets can be updated, but only if 399 // the new offset isn't negative. 400 // The MachineOperand containing the offset immediate is the last one 401 // before predicates. 402 MachineOperand &MO = 403 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 404 // The offsets are scaled by 1, 2 or 4 depending on the Opcode. 405 Offset = MO.getImm() - WordOffset * getImmScale(Opc); 406 407 // If storing the base register, it needs to be reset first. 408 unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); 409 410 if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) 411 MO.setImm(Offset); 412 else 413 InsertSub = true; 414 415 } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && 416 !definesCPSR(MBBI)) { 417 // SUBS/ADDS using this register, with a dead def of the CPSR. 418 // Merge it with the update; if the merged offset is too large, 419 // insert a new sub instead. 420 MachineOperand &MO = 421 MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); 422 Offset = (Opc == ARM::tSUBi8) ? 423 MO.getImm() + WordOffset * 4 : 424 MO.getImm() - WordOffset * 4 ; 425 if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { 426 // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if 427 // Offset == 0. 428 MO.setImm(Offset); 429 // The base register has now been reset, so exit early. 430 return; 431 } else { 432 InsertSub = true; 433 } 434 435 } else { 436 // Can't update the instruction. 437 InsertSub = true; 438 } 439 440 } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { 441 // Since SUBS sets the condition flags, we can't place the base reset 442 // after an instruction that has a live CPSR def. 443 // The base register might also contain an argument for a function call. 444 InsertSub = true; 445 } 446 447 if (InsertSub) { 448 // An instruction above couldn't be updated, so insert a sub. 449 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 450 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 451 .addImm(Pred).addReg(PredReg); 452 return; 453 } 454 455 if (MBBI->killsRegister(Base)) 456 // Register got killed. Stop updating. 457 return; 458 } 459 460 // End of block was reached. 461 if (MBB.succ_size() > 0) { 462 // FIXME: Because of a bug, live registers are sometimes missing from 463 // the successor blocks' live-in sets. This means we can't trust that 464 // information and *always* have to reset at the end of a block. 465 // See PR21029. 466 if (MBBI != MBB.end()) --MBBI; 467 AddDefaultT1CC( 468 BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) 469 .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) 470 .addImm(Pred).addReg(PredReg); 471 } 472} 473 474/// MergeOps - Create and insert a LDM or STM with Base as base register and 475/// registers in Regs as the register operands that would be loaded / stored. 476/// It returns true if the transformation is done. 477bool 478ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 479 MachineBasicBlock::iterator MBBI, 480 int Offset, unsigned Base, bool BaseKill, 481 int Opcode, ARMCC::CondCodes Pred, 482 unsigned PredReg, unsigned Scratch, DebugLoc dl, 483 ArrayRef<std::pair<unsigned, bool> > Regs, 484 ArrayRef<unsigned> ImpDefs) { 485 // Only a single register to load / store. Don't bother. 486 unsigned NumRegs = Regs.size(); 487 if (NumRegs <= 1) 488 return false; 489 490 // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. 491 // Compute liveness information for that register to make the decision. 492 bool SafeToClobberCPSR = !isThumb1 || 493 (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == 494 MachineBasicBlock::LQR_Dead); 495 496 bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. 497 498 // Exception: If the base register is in the input reglist, Thumb1 LDM is 499 // non-writeback. 500 // It's also not possible to merge an STR of the base register in Thumb1. 501 if (isThumb1) 502 for (unsigned I = 0; I < NumRegs; ++I) 503 if (Base == Regs[I].first) { 504 assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); 505 if (Opcode == ARM::tLDRi) { 506 Writeback = false; 507 break; 508 } else if (Opcode == ARM::tSTRi) { 509 return false; 510 } 511 } 512 513 ARM_AM::AMSubMode Mode = ARM_AM::ia; 514 // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. 515 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 516 bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; 517 518 if (Offset == 4 && haveIBAndDA) { 519 Mode = ARM_AM::ib; 520 } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { 521 Mode = ARM_AM::da; 522 } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { 523 // VLDM/VSTM do not support DB mode without also updating the base reg. 524 Mode = ARM_AM::db; 525 } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { 526 // Check if this is a supported opcode before inserting instructions to 527 // calculate a new base register. 528 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 529 530 // If starting offset isn't zero, insert a MI to materialize a new base. 531 // But only do so if it is cost effective, i.e. merging more than two 532 // loads / stores. 533 if (NumRegs <= 2) 534 return false; 535 536 // On Thumb1, it's not worth materializing a new base register without 537 // clobbering the CPSR (i.e. not using ADDS/SUBS). 538 if (!SafeToClobberCPSR) 539 return false; 540 541 unsigned NewBase; 542 if (isi32Load(Opcode)) { 543 // If it is a load, then just use one of the destination register to 544 // use as the new base. 545 NewBase = Regs[NumRegs-1].first; 546 } else { 547 // Use the scratch register to use as a new base. 548 NewBase = Scratch; 549 if (NewBase == 0) 550 return false; 551 } 552 553 int BaseOpc = 554 isThumb2 ? ARM::t2ADDri : 555 (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : 556 (isThumb1 && Offset < 8) ? ARM::tADDi3 : 557 isThumb1 ? ARM::tADDi8 : ARM::ADDri; 558 559 if (Offset < 0) { 560 Offset = - Offset; 561 BaseOpc = 562 isThumb2 ? ARM::t2SUBri : 563 (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : 564 isThumb1 ? ARM::tSUBi8 : ARM::SUBri; 565 } 566 567 if (!TL->isLegalAddImmediate(Offset)) 568 // FIXME: Try add with register operand? 569 return false; // Probably not worth it then. 570 571 if (isThumb1) { 572 // Thumb1: depending on immediate size, use either 573 // ADDS NewBase, Base, #imm3 574 // or 575 // MOV NewBase, Base 576 // ADDS NewBase, #imm8. 577 if (Base != NewBase && 578 (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { 579 // Need to insert a MOV to the new base first. 580 if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && 581 !STI->hasV6Ops()) { 582 // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr 583 if (Pred != ARMCC::AL) 584 return false; 585 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase) 586 .addReg(Base, getKillRegState(BaseKill)); 587 } else 588 BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) 589 .addReg(Base, getKillRegState(BaseKill)) 590 .addImm(Pred).addReg(PredReg); 591 592 // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. 593 Base = NewBase; 594 BaseKill = false; 595 } 596 if (BaseOpc == ARM::tADDrSPi) { 597 assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); 598 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 599 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) 600 .addImm(Pred).addReg(PredReg); 601 } else 602 AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) 603 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 604 .addImm(Pred).addReg(PredReg); 605 } else { 606 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 607 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 608 .addImm(Pred).addReg(PredReg).addReg(0); 609 } 610 Base = NewBase; 611 BaseKill = true; // New base is always killed straight away. 612 } 613 614 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 615 Opcode == ARM::VLDRD); 616 617 // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with 618 // base register writeback. 619 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 620 if (!Opcode) return false; 621 622 // Check if a Thumb1 LDM/STM merge is safe. This is the case if: 623 // - There is no writeback (LDM of base register), 624 // - the base register is killed by the merged instruction, 625 // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS 626 // to reset the base register. 627 // Otherwise, don't merge. 628 // It's safe to return here since the code to materialize a new base register 629 // above is also conditional on SafeToClobberCPSR. 630 if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) 631 return false; 632 633 MachineInstrBuilder MIB; 634 635 if (Writeback) { 636 if (Opcode == ARM::tLDMIA) 637 // Update tLDMIA with writeback if necessary. 638 Opcode = ARM::tLDMIA_UPD; 639 640 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 641 642 // Thumb1: we might need to set base writeback when building the MI. 643 MIB.addReg(Base, getDefRegState(true)) 644 .addReg(Base, getKillRegState(BaseKill)); 645 646 // The base isn't dead after a merged instruction with writeback. 647 // Insert a sub instruction after the newly formed instruction to reset. 648 if (!BaseKill) 649 UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); 650 651 } else { 652 // No writeback, simply build the MachineInstr. 653 MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); 654 MIB.addReg(Base, getKillRegState(BaseKill)); 655 } 656 657 MIB.addImm(Pred).addReg(PredReg); 658 659 for (unsigned i = 0; i != NumRegs; ++i) 660 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 661 | getKillRegState(Regs[i].second)); 662 663 // Add implicit defs for super-registers. 664 for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) 665 MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); 666 667 return true; 668} 669 670/// \brief Find all instructions using a given imp-def within a range. 671/// 672/// We are trying to combine a range of instructions, one of which (located at 673/// position RangeBegin) implicitly defines a register. The final LDM/STM will 674/// be placed at RangeEnd, and so any uses of this definition between RangeStart 675/// and RangeEnd must be modified to use an undefined value. 676/// 677/// The live range continues until we find a second definition or one of the 678/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so 679/// we must consider all uses and decide which are relevant in a second pass. 680void ARMLoadStoreOpt::findUsesOfImpDef( 681 SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps, 682 unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) { 683 std::map<unsigned, MachineOperand *> Uses; 684 unsigned LastLivePos = RangeEnd; 685 686 // First we find all uses of this register with Position between RangeBegin 687 // and RangeEnd, any or all of these could be uses of a definition at 688 // RangeBegin. We also record the latest position a definition at RangeBegin 689 // would be considered live. 690 for (unsigned i = 0; i < MemOps.size(); ++i) { 691 MachineInstr &MI = *MemOps[i].MBBI; 692 unsigned MIPosition = MemOps[i].Position; 693 if (MIPosition <= RangeBegin || MIPosition > RangeEnd) 694 continue; 695 696 // If this instruction defines the register, then any later use will be of 697 // that definition rather than ours. 698 if (MI.definesRegister(DefReg)) 699 LastLivePos = std::min(LastLivePos, MIPosition); 700 701 MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg); 702 if (!UseOp) 703 continue; 704 705 // If this instruction kills the register then (assuming liveness is 706 // correct when we start) we don't need to think about anything after here. 707 if (UseOp->isKill()) 708 LastLivePos = std::min(LastLivePos, MIPosition); 709 710 Uses[MIPosition] = UseOp; 711 } 712 713 // Now we traverse the list of all uses, and append the ones that actually use 714 // our definition to the requested list. 715 for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(), 716 E = Uses.end(); 717 I != E; ++I) { 718 // List is sorted by position so once we've found one out of range there 719 // will be no more to consider. 720 if (I->first > LastLivePos) 721 break; 722 UsesOfImpDefs.push_back(I->second); 723 } 724} 725 726// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 727// success. 728void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 729 MemOpQueue &memOps, 730 unsigned memOpsBegin, unsigned memOpsEnd, 731 unsigned insertAfter, int Offset, 732 unsigned Base, bool BaseKill, 733 int Opcode, 734 ARMCC::CondCodes Pred, unsigned PredReg, 735 unsigned Scratch, 736 DebugLoc dl, 737 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 738 // First calculate which of the registers should be killed by the merged 739 // instruction. 740 const unsigned insertPos = memOps[insertAfter].Position; 741 SmallSet<unsigned, 4> KilledRegs; 742 DenseMap<unsigned, unsigned> Killer; 743 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 744 if (i == memOpsBegin) { 745 i = memOpsEnd; 746 if (i == e) 747 break; 748 } 749 if (memOps[i].Position < insertPos && memOps[i].isKill) { 750 unsigned Reg = memOps[i].Reg; 751 KilledRegs.insert(Reg); 752 Killer[Reg] = i; 753 } 754 } 755 756 SmallVector<std::pair<unsigned, bool>, 8> Regs; 757 SmallVector<unsigned, 8> ImpDefs; 758 SmallVector<MachineOperand *, 8> UsesOfImpDefs; 759 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 760 unsigned Reg = memOps[i].Reg; 761 // If we are inserting the merged operation after an operation that 762 // uses the same register, make sure to transfer any kill flag. 763 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 764 Regs.push_back(std::make_pair(Reg, isKill)); 765 766 // Collect any implicit defs of super-registers. They must be preserved. 767 for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { 768 if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) 769 continue; 770 unsigned DefReg = MO->getReg(); 771 if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) 772 ImpDefs.push_back(DefReg); 773 774 // There may be other uses of the definition between this instruction and 775 // the eventual LDM/STM position. These should be marked undef if the 776 // merge takes place. 777 findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position, 778 insertPos); 779 } 780 } 781 782 // Try to do the merge. 783 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 784 ++Loc; 785 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 786 Pred, PredReg, Scratch, dl, Regs, ImpDefs)) 787 return; 788 789 // Merge succeeded, update records. 790 Merges.push_back(std::prev(Loc)); 791 792 // In gathering loads together, we may have moved the imp-def of a register 793 // past one of its uses. This is OK, since we know better than the rest of 794 // LLVM what's OK with ARM loads and stores; but we still have to adjust the 795 // affected uses. 796 for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(), 797 E = UsesOfImpDefs.end(); 798 I != E; ++I) 799 (*I)->setIsUndef(); 800 801 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 802 // Remove kill flags from any memops that come before insertPos. 803 if (Regs[i-memOpsBegin].second) { 804 unsigned Reg = Regs[i-memOpsBegin].first; 805 if (KilledRegs.count(Reg)) { 806 unsigned j = Killer[Reg]; 807 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 808 assert(Idx >= 0 && "Cannot find killing operand"); 809 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 810 memOps[j].isKill = false; 811 } 812 memOps[i].isKill = true; 813 } 814 MBB.erase(memOps[i].MBBI); 815 // Update this memop to refer to the merged instruction. 816 // We may need to move kill flags again. 817 memOps[i].Merged = true; 818 memOps[i].MBBI = Merges.back(); 819 memOps[i].Position = insertPos; 820 } 821 822 // Update memOps offsets, since they may have been modified by MergeOps. 823 for (auto &MemOp : memOps) { 824 MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); 825 } 826} 827 828/// MergeLDR_STR - Merge a number of load / store instructions into one or more 829/// load / store multiple instructions. 830void 831ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 832 unsigned Base, int Opcode, unsigned Size, 833 ARMCC::CondCodes Pred, unsigned PredReg, 834 unsigned Scratch, MemOpQueue &MemOps, 835 SmallVectorImpl<MachineBasicBlock::iterator> &Merges) { 836 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 837 int Offset = MemOps[SIndex].Offset; 838 int SOffset = Offset; 839 unsigned insertAfter = SIndex; 840 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 841 DebugLoc dl = Loc->getDebugLoc(); 842 const MachineOperand &PMO = Loc->getOperand(0); 843 unsigned PReg = PMO.getReg(); 844 unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); 845 unsigned Count = 1; 846 unsigned Limit = ~0U; 847 bool BaseKill = false; 848 // vldm / vstm limit are 32 for S variants, 16 for D variants. 849 850 switch (Opcode) { 851 default: break; 852 case ARM::VSTRS: 853 Limit = 32; 854 break; 855 case ARM::VSTRD: 856 Limit = 16; 857 break; 858 case ARM::VLDRD: 859 Limit = 16; 860 break; 861 case ARM::VLDRS: 862 Limit = 32; 863 break; 864 } 865 866 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 867 int NewOffset = MemOps[i].Offset; 868 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 869 unsigned Reg = MO.getReg(); 870 unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); 871 // Register numbers must be in ascending order. For VFP / NEON load and 872 // store multiples, the registers must also be consecutive and within the 873 // limit on the number of registers per instruction. 874 if (Reg != ARM::SP && 875 NewOffset == Offset + (int)Size && 876 ((isNotVFP && RegNum > PRegNum) || 877 ((Count < Limit) && RegNum == PRegNum+1)) && 878 // On Swift we don't want vldm/vstm to start with a odd register num 879 // because Q register unaligned vldm/vstm need more uops. 880 (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) { 881 Offset += Size; 882 PRegNum = RegNum; 883 ++Count; 884 } else { 885 // Can't merge this in. Try merge the earlier ones first. 886 // We need to compute BaseKill here because the MemOps may have been 887 // reordered. 888 BaseKill = Loc->killsRegister(Base); 889 890 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, 891 BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 892 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 893 MemOps, Merges); 894 return; 895 } 896 897 if (MemOps[i].Position > MemOps[insertAfter].Position) { 898 insertAfter = i; 899 Loc = MemOps[i].MBBI; 900 } 901 } 902 903 BaseKill = Loc->killsRegister(Base); 904 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 905 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 906} 907 908static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 909 unsigned Bytes, unsigned Limit, 910 ARMCC::CondCodes Pred, unsigned PredReg) { 911 unsigned MyPredReg = 0; 912 if (!MI) 913 return false; 914 915 bool CheckCPSRDef = false; 916 switch (MI->getOpcode()) { 917 default: return false; 918 case ARM::tSUBi8: 919 case ARM::t2SUBri: 920 case ARM::SUBri: 921 CheckCPSRDef = true; 922 // fallthrough 923 case ARM::tSUBspi: 924 break; 925 } 926 927 // Make sure the offset fits in 8 bits. 928 if (Bytes == 0 || (Limit && Bytes >= Limit)) 929 return false; 930 931 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || 932 MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME 933 if (!(MI->getOperand(0).getReg() == Base && 934 MI->getOperand(1).getReg() == Base && 935 (MI->getOperand(2).getImm() * Scale) == Bytes && 936 getInstrPredicate(MI, MyPredReg) == Pred && 937 MyPredReg == PredReg)) 938 return false; 939 940 return CheckCPSRDef ? !definesCPSR(MI) : true; 941} 942 943static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 944 unsigned Bytes, unsigned Limit, 945 ARMCC::CondCodes Pred, unsigned PredReg) { 946 unsigned MyPredReg = 0; 947 if (!MI) 948 return false; 949 950 bool CheckCPSRDef = false; 951 switch (MI->getOpcode()) { 952 default: return false; 953 case ARM::tADDi8: 954 case ARM::t2ADDri: 955 case ARM::ADDri: 956 CheckCPSRDef = true; 957 // fallthrough 958 case ARM::tADDspi: 959 break; 960 } 961 962 if (Bytes == 0 || (Limit && Bytes >= Limit)) 963 // Make sure the offset fits in 8 bits. 964 return false; 965 966 unsigned Scale = (MI->getOpcode() == ARM::tADDspi || 967 MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME 968 if (!(MI->getOperand(0).getReg() == Base && 969 MI->getOperand(1).getReg() == Base && 970 (MI->getOperand(2).getImm() * Scale) == Bytes && 971 getInstrPredicate(MI, MyPredReg) == Pred && 972 MyPredReg == PredReg)) 973 return false; 974 975 return CheckCPSRDef ? !definesCPSR(MI) : true; 976} 977 978static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 979 switch (MI->getOpcode()) { 980 default: return 0; 981 case ARM::LDRi12: 982 case ARM::STRi12: 983 case ARM::tLDRi: 984 case ARM::tSTRi: 985 case ARM::tLDRspi: 986 case ARM::tSTRspi: 987 case ARM::t2LDRi8: 988 case ARM::t2LDRi12: 989 case ARM::t2STRi8: 990 case ARM::t2STRi12: 991 case ARM::VLDRS: 992 case ARM::VSTRS: 993 return 4; 994 case ARM::VLDRD: 995 case ARM::VSTRD: 996 return 8; 997 case ARM::LDMIA: 998 case ARM::LDMDA: 999 case ARM::LDMDB: 1000 case ARM::LDMIB: 1001 case ARM::STMIA: 1002 case ARM::STMDA: 1003 case ARM::STMDB: 1004 case ARM::STMIB: 1005 case ARM::tLDMIA: 1006 case ARM::tLDMIA_UPD: 1007 case ARM::tSTMIA_UPD: 1008 case ARM::t2LDMIA: 1009 case ARM::t2LDMDB: 1010 case ARM::t2STMIA: 1011 case ARM::t2STMDB: 1012 case ARM::VLDMSIA: 1013 case ARM::VSTMSIA: 1014 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 1015 case ARM::VLDMDIA: 1016 case ARM::VSTMDIA: 1017 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 1018 } 1019} 1020 1021static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 1022 ARM_AM::AMSubMode Mode) { 1023 switch (Opc) { 1024 default: llvm_unreachable("Unhandled opcode!"); 1025 case ARM::LDMIA: 1026 case ARM::LDMDA: 1027 case ARM::LDMDB: 1028 case ARM::LDMIB: 1029 switch (Mode) { 1030 default: llvm_unreachable("Unhandled submode!"); 1031 case ARM_AM::ia: return ARM::LDMIA_UPD; 1032 case ARM_AM::ib: return ARM::LDMIB_UPD; 1033 case ARM_AM::da: return ARM::LDMDA_UPD; 1034 case ARM_AM::db: return ARM::LDMDB_UPD; 1035 } 1036 case ARM::STMIA: 1037 case ARM::STMDA: 1038 case ARM::STMDB: 1039 case ARM::STMIB: 1040 switch (Mode) { 1041 default: llvm_unreachable("Unhandled submode!"); 1042 case ARM_AM::ia: return ARM::STMIA_UPD; 1043 case ARM_AM::ib: return ARM::STMIB_UPD; 1044 case ARM_AM::da: return ARM::STMDA_UPD; 1045 case ARM_AM::db: return ARM::STMDB_UPD; 1046 } 1047 case ARM::t2LDMIA: 1048 case ARM::t2LDMDB: 1049 switch (Mode) { 1050 default: llvm_unreachable("Unhandled submode!"); 1051 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 1052 case ARM_AM::db: return ARM::t2LDMDB_UPD; 1053 } 1054 case ARM::t2STMIA: 1055 case ARM::t2STMDB: 1056 switch (Mode) { 1057 default: llvm_unreachable("Unhandled submode!"); 1058 case ARM_AM::ia: return ARM::t2STMIA_UPD; 1059 case ARM_AM::db: return ARM::t2STMDB_UPD; 1060 } 1061 case ARM::VLDMSIA: 1062 switch (Mode) { 1063 default: llvm_unreachable("Unhandled submode!"); 1064 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 1065 case ARM_AM::db: return ARM::VLDMSDB_UPD; 1066 } 1067 case ARM::VLDMDIA: 1068 switch (Mode) { 1069 default: llvm_unreachable("Unhandled submode!"); 1070 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 1071 case ARM_AM::db: return ARM::VLDMDDB_UPD; 1072 } 1073 case ARM::VSTMSIA: 1074 switch (Mode) { 1075 default: llvm_unreachable("Unhandled submode!"); 1076 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 1077 case ARM_AM::db: return ARM::VSTMSDB_UPD; 1078 } 1079 case ARM::VSTMDIA: 1080 switch (Mode) { 1081 default: llvm_unreachable("Unhandled submode!"); 1082 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 1083 case ARM_AM::db: return ARM::VSTMDDB_UPD; 1084 } 1085 } 1086} 1087 1088/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 1089/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 1090/// 1091/// stmia rn, <ra, rb, rc> 1092/// rn := rn + 4 * 3; 1093/// => 1094/// stmia rn!, <ra, rb, rc> 1095/// 1096/// rn := rn - 4 * 3; 1097/// ldmia rn, <ra, rb, rc> 1098/// => 1099/// ldmdb rn!, <ra, rb, rc> 1100bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 1101 MachineBasicBlock::iterator MBBI, 1102 bool &Advance, 1103 MachineBasicBlock::iterator &I) { 1104 // Thumb1 is already using updating loads/stores. 1105 if (isThumb1) return false; 1106 1107 MachineInstr *MI = MBBI; 1108 unsigned Base = MI->getOperand(0).getReg(); 1109 bool BaseKill = MI->getOperand(0).isKill(); 1110 unsigned Bytes = getLSMultipleTransferSize(MI); 1111 unsigned PredReg = 0; 1112 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1113 int Opcode = MI->getOpcode(); 1114 DebugLoc dl = MI->getDebugLoc(); 1115 1116 // Can't use an updating ld/st if the base register is also a dest 1117 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 1118 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 1119 if (MI->getOperand(i).getReg() == Base) 1120 return false; 1121 1122 bool DoMerge = false; 1123 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 1124 1125 // Try merging with the previous instruction. 1126 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1127 if (MBBI != BeginMBBI) { 1128 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1129 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1130 --PrevMBBI; 1131 if (Mode == ARM_AM::ia && 1132 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1133 Mode = ARM_AM::db; 1134 DoMerge = true; 1135 } else if (Mode == ARM_AM::ib && 1136 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 1137 Mode = ARM_AM::da; 1138 DoMerge = true; 1139 } 1140 if (DoMerge) 1141 MBB.erase(PrevMBBI); 1142 } 1143 1144 // Try merging with the next instruction. 1145 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1146 if (!DoMerge && MBBI != EndMBBI) { 1147 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1148 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1149 ++NextMBBI; 1150 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 1151 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1152 DoMerge = true; 1153 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 1154 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 1155 DoMerge = true; 1156 } 1157 if (DoMerge) { 1158 if (NextMBBI == I) { 1159 Advance = true; 1160 ++I; 1161 } 1162 MBB.erase(NextMBBI); 1163 } 1164 } 1165 1166 if (!DoMerge) 1167 return false; 1168 1169 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 1170 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1171 .addReg(Base, getDefRegState(true)) // WB base register 1172 .addReg(Base, getKillRegState(BaseKill)) 1173 .addImm(Pred).addReg(PredReg); 1174 1175 // Transfer the rest of operands. 1176 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 1177 MIB.addOperand(MI->getOperand(OpNum)); 1178 1179 // Transfer memoperands. 1180 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 1181 1182 MBB.erase(MBBI); 1183 return true; 1184} 1185 1186static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 1187 ARM_AM::AddrOpc Mode) { 1188 switch (Opc) { 1189 case ARM::LDRi12: 1190 return ARM::LDR_PRE_IMM; 1191 case ARM::STRi12: 1192 return ARM::STR_PRE_IMM; 1193 case ARM::VLDRS: 1194 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1195 case ARM::VLDRD: 1196 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1197 case ARM::VSTRS: 1198 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1199 case ARM::VSTRD: 1200 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1201 case ARM::t2LDRi8: 1202 case ARM::t2LDRi12: 1203 return ARM::t2LDR_PRE; 1204 case ARM::t2STRi8: 1205 case ARM::t2STRi12: 1206 return ARM::t2STR_PRE; 1207 default: llvm_unreachable("Unhandled opcode!"); 1208 } 1209} 1210 1211static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 1212 ARM_AM::AddrOpc Mode) { 1213 switch (Opc) { 1214 case ARM::LDRi12: 1215 return ARM::LDR_POST_IMM; 1216 case ARM::STRi12: 1217 return ARM::STR_POST_IMM; 1218 case ARM::VLDRS: 1219 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 1220 case ARM::VLDRD: 1221 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 1222 case ARM::VSTRS: 1223 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 1224 case ARM::VSTRD: 1225 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 1226 case ARM::t2LDRi8: 1227 case ARM::t2LDRi12: 1228 return ARM::t2LDR_POST; 1229 case ARM::t2STRi8: 1230 case ARM::t2STRi12: 1231 return ARM::t2STR_POST; 1232 default: llvm_unreachable("Unhandled opcode!"); 1233 } 1234} 1235 1236/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 1237/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 1238bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 1239 MachineBasicBlock::iterator MBBI, 1240 const TargetInstrInfo *TII, 1241 bool &Advance, 1242 MachineBasicBlock::iterator &I) { 1243 // Thumb1 doesn't have updating LDR/STR. 1244 // FIXME: Use LDM/STM with single register instead. 1245 if (isThumb1) return false; 1246 1247 MachineInstr *MI = MBBI; 1248 unsigned Base = MI->getOperand(1).getReg(); 1249 bool BaseKill = MI->getOperand(1).isKill(); 1250 unsigned Bytes = getLSMultipleTransferSize(MI); 1251 int Opcode = MI->getOpcode(); 1252 DebugLoc dl = MI->getDebugLoc(); 1253 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 1254 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 1255 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 1256 if (isi32Load(Opcode) || isi32Store(Opcode)) 1257 if (MI->getOperand(2).getImm() != 0) 1258 return false; 1259 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 1260 return false; 1261 1262 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 1263 // Can't do the merge if the destination register is the same as the would-be 1264 // writeback register. 1265 if (MI->getOperand(0).getReg() == Base) 1266 return false; 1267 1268 unsigned PredReg = 0; 1269 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1270 bool DoMerge = false; 1271 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1272 unsigned NewOpc = 0; 1273 // AM2 - 12 bits, thumb2 - 8 bits. 1274 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 1275 1276 // Try merging with the previous instruction. 1277 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 1278 if (MBBI != BeginMBBI) { 1279 MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI); 1280 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 1281 --PrevMBBI; 1282 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1283 DoMerge = true; 1284 AddSub = ARM_AM::sub; 1285 } else if (!isAM5 && 1286 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1287 DoMerge = true; 1288 } 1289 if (DoMerge) { 1290 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 1291 MBB.erase(PrevMBBI); 1292 } 1293 } 1294 1295 // Try merging with the next instruction. 1296 MachineBasicBlock::iterator EndMBBI = MBB.end(); 1297 if (!DoMerge && MBBI != EndMBBI) { 1298 MachineBasicBlock::iterator NextMBBI = std::next(MBBI); 1299 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 1300 ++NextMBBI; 1301 if (!isAM5 && 1302 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 1303 DoMerge = true; 1304 AddSub = ARM_AM::sub; 1305 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 1306 DoMerge = true; 1307 } 1308 if (DoMerge) { 1309 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 1310 if (NextMBBI == I) { 1311 Advance = true; 1312 ++I; 1313 } 1314 MBB.erase(NextMBBI); 1315 } 1316 } 1317 1318 if (!DoMerge) 1319 return false; 1320 1321 if (isAM5) { 1322 // VLDM[SD]_UPD, VSTM[SD]_UPD 1323 // (There are no base-updating versions of VLDR/VSTR instructions, but the 1324 // updating load/store-multiple instructions can be used with only one 1325 // register.) 1326 MachineOperand &MO = MI->getOperand(0); 1327 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 1328 .addReg(Base, getDefRegState(true)) // WB base register 1329 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 1330 .addImm(Pred).addReg(PredReg) 1331 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 1332 getKillRegState(MO.isKill()))); 1333 } else if (isLd) { 1334 if (isAM2) { 1335 // LDR_PRE, LDR_POST 1336 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 1337 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1338 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1339 .addReg(Base, RegState::Define) 1340 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1341 } else { 1342 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1343 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1344 .addReg(Base, RegState::Define) 1345 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1346 } 1347 } else { 1348 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1349 // t2LDR_PRE, t2LDR_POST 1350 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 1351 .addReg(Base, RegState::Define) 1352 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1353 } 1354 } else { 1355 MachineOperand &MO = MI->getOperand(0); 1356 // FIXME: post-indexed stores use am2offset_imm, which still encodes 1357 // the vestigal zero-reg offset register. When that's fixed, this clause 1358 // can be removed entirely. 1359 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 1360 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 1361 // STR_PRE, STR_POST 1362 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1363 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1364 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 1365 } else { 1366 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 1367 // t2STR_PRE, t2STR_POST 1368 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 1369 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 1370 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 1371 } 1372 } 1373 MBB.erase(MBBI); 1374 1375 return true; 1376} 1377 1378/// isMemoryOp - Returns true if instruction is a memory operation that this 1379/// pass is capable of operating on. 1380static bool isMemoryOp(const MachineInstr *MI) { 1381 // When no memory operands are present, conservatively assume unaligned, 1382 // volatile, unfoldable. 1383 if (!MI->hasOneMemOperand()) 1384 return false; 1385 1386 const MachineMemOperand *MMO = *MI->memoperands_begin(); 1387 1388 // Don't touch volatile memory accesses - we may be changing their order. 1389 if (MMO->isVolatile()) 1390 return false; 1391 1392 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 1393 // not. 1394 if (MMO->getAlignment() < 4) 1395 return false; 1396 1397 // str <undef> could probably be eliminated entirely, but for now we just want 1398 // to avoid making a mess of it. 1399 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 1400 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 1401 MI->getOperand(0).isUndef()) 1402 return false; 1403 1404 // Likewise don't mess with references to undefined addresses. 1405 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 1406 MI->getOperand(1).isUndef()) 1407 return false; 1408 1409 int Opcode = MI->getOpcode(); 1410 switch (Opcode) { 1411 default: break; 1412 case ARM::VLDRS: 1413 case ARM::VSTRS: 1414 return MI->getOperand(1).isReg(); 1415 case ARM::VLDRD: 1416 case ARM::VSTRD: 1417 return MI->getOperand(1).isReg(); 1418 case ARM::LDRi12: 1419 case ARM::STRi12: 1420 case ARM::tLDRi: 1421 case ARM::tSTRi: 1422 case ARM::tLDRspi: 1423 case ARM::tSTRspi: 1424 case ARM::t2LDRi8: 1425 case ARM::t2LDRi12: 1426 case ARM::t2STRi8: 1427 case ARM::t2STRi12: 1428 return MI->getOperand(1).isReg(); 1429 } 1430 return false; 1431} 1432 1433/// AdvanceRS - Advance register scavenger to just before the earliest memory 1434/// op that is being merged. 1435void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1436 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1437 unsigned Position = MemOps[0].Position; 1438 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1439 if (MemOps[i].Position < Position) { 1440 Position = MemOps[i].Position; 1441 Loc = MemOps[i].MBBI; 1442 } 1443 } 1444 1445 if (Loc != MBB.begin()) 1446 RS->forward(std::prev(Loc)); 1447} 1448 1449static void InsertLDR_STR(MachineBasicBlock &MBB, 1450 MachineBasicBlock::iterator &MBBI, 1451 int Offset, bool isDef, 1452 DebugLoc dl, unsigned NewOpc, 1453 unsigned Reg, bool RegDeadKill, bool RegUndef, 1454 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1455 bool OffKill, bool OffUndef, 1456 ARMCC::CondCodes Pred, unsigned PredReg, 1457 const TargetInstrInfo *TII, bool isT2) { 1458 if (isDef) { 1459 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1460 TII->get(NewOpc)) 1461 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1462 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1463 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1464 } else { 1465 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1466 TII->get(NewOpc)) 1467 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1468 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1469 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1470 } 1471} 1472 1473bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1474 MachineBasicBlock::iterator &MBBI) { 1475 MachineInstr *MI = &*MBBI; 1476 unsigned Opcode = MI->getOpcode(); 1477 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1478 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1479 const MachineOperand &BaseOp = MI->getOperand(2); 1480 unsigned BaseReg = BaseOp.getReg(); 1481 unsigned EvenReg = MI->getOperand(0).getReg(); 1482 unsigned OddReg = MI->getOperand(1).getReg(); 1483 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1484 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1485 // ARM errata 602117: LDRD with base in list may result in incorrect base 1486 // register when interrupted or faulted. 1487 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1488 if (!Errata602117 && 1489 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1490 return false; 1491 1492 MachineBasicBlock::iterator NewBBI = MBBI; 1493 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1494 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1495 bool EvenDeadKill = isLd ? 1496 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1497 bool EvenUndef = MI->getOperand(0).isUndef(); 1498 bool OddDeadKill = isLd ? 1499 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1500 bool OddUndef = MI->getOperand(1).isUndef(); 1501 bool BaseKill = BaseOp.isKill(); 1502 bool BaseUndef = BaseOp.isUndef(); 1503 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1504 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1505 int OffImm = getMemoryOpOffset(MI); 1506 unsigned PredReg = 0; 1507 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1508 1509 if (OddRegNum > EvenRegNum && OffImm == 0) { 1510 // Ascending register numbers and no offset. It's safe to change it to a 1511 // ldm or stm. 1512 unsigned NewOpc = (isLd) 1513 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1514 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1515 if (isLd) { 1516 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1517 .addReg(BaseReg, getKillRegState(BaseKill)) 1518 .addImm(Pred).addReg(PredReg) 1519 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1520 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1521 ++NumLDRD2LDM; 1522 } else { 1523 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1524 .addReg(BaseReg, getKillRegState(BaseKill)) 1525 .addImm(Pred).addReg(PredReg) 1526 .addReg(EvenReg, 1527 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1528 .addReg(OddReg, 1529 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1530 ++NumSTRD2STM; 1531 } 1532 NewBBI = std::prev(MBBI); 1533 } else { 1534 // Split into two instructions. 1535 unsigned NewOpc = (isLd) 1536 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1537 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1538 // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, 1539 // so adjust and use t2LDRi12 here for that. 1540 unsigned NewOpc2 = (isLd) 1541 ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1542 : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1543 DebugLoc dl = MBBI->getDebugLoc(); 1544 // If this is a load and base register is killed, it may have been 1545 // re-defed by the load, make sure the first load does not clobber it. 1546 if (isLd && 1547 (BaseKill || OffKill) && 1548 (TRI->regsOverlap(EvenReg, BaseReg))) { 1549 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1550 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1551 OddReg, OddDeadKill, false, 1552 BaseReg, false, BaseUndef, false, OffUndef, 1553 Pred, PredReg, TII, isT2); 1554 NewBBI = std::prev(MBBI); 1555 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1556 EvenReg, EvenDeadKill, false, 1557 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1558 Pred, PredReg, TII, isT2); 1559 } else { 1560 if (OddReg == EvenReg && EvenDeadKill) { 1561 // If the two source operands are the same, the kill marker is 1562 // probably on the first one. e.g. 1563 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1564 EvenDeadKill = false; 1565 OddDeadKill = true; 1566 } 1567 // Never kill the base register in the first instruction. 1568 if (EvenReg == BaseReg) 1569 EvenDeadKill = false; 1570 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1571 EvenReg, EvenDeadKill, EvenUndef, 1572 BaseReg, false, BaseUndef, false, OffUndef, 1573 Pred, PredReg, TII, isT2); 1574 NewBBI = std::prev(MBBI); 1575 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, 1576 OddReg, OddDeadKill, OddUndef, 1577 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1578 Pred, PredReg, TII, isT2); 1579 } 1580 if (isLd) 1581 ++NumLDRD2LDR; 1582 else 1583 ++NumSTRD2STR; 1584 } 1585 1586 MBB.erase(MI); 1587 MBBI = NewBBI; 1588 return true; 1589 } 1590 return false; 1591} 1592 1593/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1594/// ops of the same base and incrementing offset into LDM / STM ops. 1595bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1596 unsigned NumMerges = 0; 1597 unsigned NumMemOps = 0; 1598 MemOpQueue MemOps; 1599 unsigned CurrBase = 0; 1600 int CurrOpc = -1; 1601 unsigned CurrSize = 0; 1602 ARMCC::CondCodes CurrPred = ARMCC::AL; 1603 unsigned CurrPredReg = 0; 1604 unsigned Position = 0; 1605 SmallVector<MachineBasicBlock::iterator,4> Merges; 1606 1607 RS->enterBasicBlock(&MBB); 1608 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1609 while (MBBI != E) { 1610 if (FixInvalidRegPairOp(MBB, MBBI)) 1611 continue; 1612 1613 bool Advance = false; 1614 bool TryMerge = false; 1615 bool Clobber = false; 1616 1617 bool isMemOp = isMemoryOp(MBBI); 1618 if (isMemOp) { 1619 int Opcode = MBBI->getOpcode(); 1620 unsigned Size = getLSMultipleTransferSize(MBBI); 1621 const MachineOperand &MO = MBBI->getOperand(0); 1622 unsigned Reg = MO.getReg(); 1623 bool isKill = MO.isDef() ? false : MO.isKill(); 1624 unsigned Base = MBBI->getOperand(1).getReg(); 1625 unsigned PredReg = 0; 1626 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1627 int Offset = getMemoryOpOffset(MBBI); 1628 // Watch out for: 1629 // r4 := ldr [r5] 1630 // r5 := ldr [r5, #4] 1631 // r6 := ldr [r5, #8] 1632 // 1633 // The second ldr has effectively broken the chain even though it 1634 // looks like the later ldr(s) use the same base register. Try to 1635 // merge the ldr's so far, including this one. But don't try to 1636 // combine the following ldr(s). 1637 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1638 1639 // Watch out for: 1640 // r4 := ldr [r0, #8] 1641 // r4 := ldr [r0, #4] 1642 // 1643 // The optimization may reorder the second ldr in front of the first 1644 // ldr, which violates write after write(WAW) dependence. The same as 1645 // str. Try to merge inst(s) already in MemOps. 1646 bool Overlap = false; 1647 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { 1648 if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) { 1649 Overlap = true; 1650 break; 1651 } 1652 } 1653 1654 if (CurrBase == 0 && !Clobber) { 1655 // Start of a new chain. 1656 CurrBase = Base; 1657 CurrOpc = Opcode; 1658 CurrSize = Size; 1659 CurrPred = Pred; 1660 CurrPredReg = PredReg; 1661 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1662 ++NumMemOps; 1663 Advance = true; 1664 } else if (!Overlap) { 1665 if (Clobber) { 1666 TryMerge = true; 1667 Advance = true; 1668 } 1669 1670 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1671 // No need to match PredReg. 1672 // Continue adding to the queue. 1673 if (Offset > MemOps.back().Offset) { 1674 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1675 Position, MBBI)); 1676 ++NumMemOps; 1677 Advance = true; 1678 } else { 1679 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1680 I != E; ++I) { 1681 if (Offset < I->Offset) { 1682 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1683 Position, MBBI)); 1684 ++NumMemOps; 1685 Advance = true; 1686 break; 1687 } else if (Offset == I->Offset) { 1688 // Collision! This can't be merged! 1689 break; 1690 } 1691 } 1692 } 1693 } 1694 } 1695 } 1696 1697 if (MBBI->isDebugValue()) { 1698 ++MBBI; 1699 if (MBBI == E) 1700 // Reach the end of the block, try merging the memory instructions. 1701 TryMerge = true; 1702 } else if (Advance) { 1703 ++Position; 1704 ++MBBI; 1705 if (MBBI == E) 1706 // Reach the end of the block, try merging the memory instructions. 1707 TryMerge = true; 1708 } else { 1709 TryMerge = true; 1710 } 1711 1712 if (TryMerge) { 1713 if (NumMemOps > 1) { 1714 // Try to find a free register to use as a new base in case it's needed. 1715 // First advance to the instruction just before the start of the chain. 1716 AdvanceRS(MBB, MemOps); 1717 1718 // Find a scratch register. 1719 unsigned Scratch = 1720 RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); 1721 1722 // Process the load / store instructions. 1723 RS->forward(std::prev(MBBI)); 1724 1725 // Merge ops. 1726 Merges.clear(); 1727 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1728 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1729 1730 // Try folding preceding/trailing base inc/dec into the generated 1731 // LDM/STM ops. 1732 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1733 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1734 ++NumMerges; 1735 NumMerges += Merges.size(); 1736 1737 // Try folding preceding/trailing base inc/dec into those load/store 1738 // that were not merged to form LDM/STM ops. 1739 for (unsigned i = 0; i != NumMemOps; ++i) 1740 if (!MemOps[i].Merged) 1741 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1742 ++NumMerges; 1743 1744 // RS may be pointing to an instruction that's deleted. 1745 RS->skipTo(std::prev(MBBI)); 1746 } else if (NumMemOps == 1) { 1747 // Try folding preceding/trailing base inc/dec into the single 1748 // load/store. 1749 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1750 ++NumMerges; 1751 RS->forward(std::prev(MBBI)); 1752 } 1753 } 1754 1755 CurrBase = 0; 1756 CurrOpc = -1; 1757 CurrSize = 0; 1758 CurrPred = ARMCC::AL; 1759 CurrPredReg = 0; 1760 if (NumMemOps) { 1761 MemOps.clear(); 1762 NumMemOps = 0; 1763 } 1764 1765 // If iterator hasn't been advanced and this is not a memory op, skip it. 1766 // It can't start a new chain anyway. 1767 if (!Advance && !isMemOp && MBBI != E) { 1768 ++Position; 1769 ++MBBI; 1770 } 1771 } 1772 } 1773 return NumMerges > 0; 1774} 1775 1776/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1777/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1778/// directly restore the value of LR into pc. 1779/// ldmfd sp!, {..., lr} 1780/// bx lr 1781/// or 1782/// ldmfd sp!, {..., lr} 1783/// mov pc, lr 1784/// => 1785/// ldmfd sp!, {..., pc} 1786bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1787 // Thumb1 LDM doesn't allow high registers. 1788 if (isThumb1) return false; 1789 if (MBB.empty()) return false; 1790 1791 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1792 if (MBBI != MBB.begin() && 1793 (MBBI->getOpcode() == ARM::BX_RET || 1794 MBBI->getOpcode() == ARM::tBX_RET || 1795 MBBI->getOpcode() == ARM::MOVPCLR)) { 1796 MachineInstr *PrevMI = std::prev(MBBI); 1797 unsigned Opcode = PrevMI->getOpcode(); 1798 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1799 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1800 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1801 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1802 if (MO.getReg() != ARM::LR) 1803 return false; 1804 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1805 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1806 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1807 PrevMI->setDesc(TII->get(NewOpc)); 1808 MO.setReg(ARM::PC); 1809 PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI); 1810 MBB.erase(MBBI); 1811 return true; 1812 } 1813 } 1814 return false; 1815} 1816 1817bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1818 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1819 TL = STI->getTargetLowering(); 1820 AFI = Fn.getInfo<ARMFunctionInfo>(); 1821 TII = STI->getInstrInfo(); 1822 TRI = STI->getRegisterInfo(); 1823 RS = new RegScavenger(); 1824 isThumb2 = AFI->isThumb2Function(); 1825 isThumb1 = AFI->isThumbFunction() && !isThumb2; 1826 1827 bool Modified = false; 1828 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1829 ++MFI) { 1830 MachineBasicBlock &MBB = *MFI; 1831 Modified |= LoadStoreMultipleOpti(MBB); 1832 if (STI->hasV5TOps()) 1833 Modified |= MergeReturnIntoLDM(MBB); 1834 } 1835 1836 delete RS; 1837 return Modified; 1838} 1839 1840 1841/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1842/// load / stores from consecutive locations close to make it more 1843/// likely they will be combined later. 1844 1845namespace { 1846 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1847 static char ID; 1848 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1849 1850 const DataLayout *TD; 1851 const TargetInstrInfo *TII; 1852 const TargetRegisterInfo *TRI; 1853 const ARMSubtarget *STI; 1854 MachineRegisterInfo *MRI; 1855 MachineFunction *MF; 1856 1857 bool runOnMachineFunction(MachineFunction &Fn) override; 1858 1859 const char *getPassName() const override { 1860 return "ARM pre- register allocation load / store optimization pass"; 1861 } 1862 1863 private: 1864 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1865 unsigned &NewOpc, unsigned &EvenReg, 1866 unsigned &OddReg, unsigned &BaseReg, 1867 int &Offset, 1868 unsigned &PredReg, ARMCC::CondCodes &Pred, 1869 bool &isT2); 1870 bool RescheduleOps(MachineBasicBlock *MBB, 1871 SmallVectorImpl<MachineInstr *> &Ops, 1872 unsigned Base, bool isLd, 1873 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1874 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1875 }; 1876 char ARMPreAllocLoadStoreOpt::ID = 0; 1877} 1878 1879bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1880 TD = Fn.getTarget().getDataLayout(); 1881 STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 1882 TII = STI->getInstrInfo(); 1883 TRI = STI->getRegisterInfo(); 1884 MRI = &Fn.getRegInfo(); 1885 MF = &Fn; 1886 1887 bool Modified = false; 1888 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1889 ++MFI) 1890 Modified |= RescheduleLoadStoreInstrs(MFI); 1891 1892 return Modified; 1893} 1894 1895static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1896 MachineBasicBlock::iterator I, 1897 MachineBasicBlock::iterator E, 1898 SmallPtrSetImpl<MachineInstr*> &MemOps, 1899 SmallSet<unsigned, 4> &MemRegs, 1900 const TargetRegisterInfo *TRI) { 1901 // Are there stores / loads / calls between them? 1902 // FIXME: This is overly conservative. We should make use of alias information 1903 // some day. 1904 SmallSet<unsigned, 4> AddedRegPressure; 1905 while (++I != E) { 1906 if (I->isDebugValue() || MemOps.count(&*I)) 1907 continue; 1908 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1909 return false; 1910 if (isLd && I->mayStore()) 1911 return false; 1912 if (!isLd) { 1913 if (I->mayLoad()) 1914 return false; 1915 // It's not safe to move the first 'str' down. 1916 // str r1, [r0] 1917 // strh r5, [r0] 1918 // str r4, [r0, #+4] 1919 if (I->mayStore()) 1920 return false; 1921 } 1922 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1923 MachineOperand &MO = I->getOperand(j); 1924 if (!MO.isReg()) 1925 continue; 1926 unsigned Reg = MO.getReg(); 1927 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1928 return false; 1929 if (Reg != Base && !MemRegs.count(Reg)) 1930 AddedRegPressure.insert(Reg); 1931 } 1932 } 1933 1934 // Estimate register pressure increase due to the transformation. 1935 if (MemRegs.size() <= 4) 1936 // Ok if we are moving small number of instructions. 1937 return true; 1938 return AddedRegPressure.size() <= MemRegs.size() * 2; 1939} 1940 1941 1942/// Copy Op0 and Op1 operands into a new array assigned to MI. 1943static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1944 MachineInstr *Op1) { 1945 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1946 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1947 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1948 1949 MachineFunction *MF = MI->getParent()->getParent(); 1950 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1951 MachineSDNode::mmo_iterator MemEnd = 1952 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1953 MemEnd = 1954 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1955 MI->setMemRefs(MemBegin, MemEnd); 1956} 1957 1958bool 1959ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1960 DebugLoc &dl, 1961 unsigned &NewOpc, unsigned &EvenReg, 1962 unsigned &OddReg, unsigned &BaseReg, 1963 int &Offset, unsigned &PredReg, 1964 ARMCC::CondCodes &Pred, 1965 bool &isT2) { 1966 // Make sure we're allowed to generate LDRD/STRD. 1967 if (!STI->hasV5TEOps()) 1968 return false; 1969 1970 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1971 unsigned Scale = 1; 1972 unsigned Opcode = Op0->getOpcode(); 1973 if (Opcode == ARM::LDRi12) { 1974 NewOpc = ARM::LDRD; 1975 } else if (Opcode == ARM::STRi12) { 1976 NewOpc = ARM::STRD; 1977 } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1978 NewOpc = ARM::t2LDRDi8; 1979 Scale = 4; 1980 isT2 = true; 1981 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1982 NewOpc = ARM::t2STRDi8; 1983 Scale = 4; 1984 isT2 = true; 1985 } else { 1986 return false; 1987 } 1988 1989 // Make sure the base address satisfies i64 ld / st alignment requirement. 1990 // At the moment, we ignore the memoryoperand's value. 1991 // If we want to use AliasAnalysis, we should check it accordingly. 1992 if (!Op0->hasOneMemOperand() || 1993 (*Op0->memoperands_begin())->isVolatile()) 1994 return false; 1995 1996 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1997 const Function *Func = MF->getFunction(); 1998 unsigned ReqAlign = STI->hasV6Ops() 1999 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 2000 : 8; // Pre-v6 need 8-byte align 2001 if (Align < ReqAlign) 2002 return false; 2003 2004 // Then make sure the immediate offset fits. 2005 int OffImm = getMemoryOpOffset(Op0); 2006 if (isT2) { 2007 int Limit = (1 << 8) * Scale; 2008 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 2009 return false; 2010 Offset = OffImm; 2011 } else { 2012 ARM_AM::AddrOpc AddSub = ARM_AM::add; 2013 if (OffImm < 0) { 2014 AddSub = ARM_AM::sub; 2015 OffImm = - OffImm; 2016 } 2017 int Limit = (1 << 8) * Scale; 2018 if (OffImm >= Limit || (OffImm & (Scale-1))) 2019 return false; 2020 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 2021 } 2022 EvenReg = Op0->getOperand(0).getReg(); 2023 OddReg = Op1->getOperand(0).getReg(); 2024 if (EvenReg == OddReg) 2025 return false; 2026 BaseReg = Op0->getOperand(1).getReg(); 2027 Pred = getInstrPredicate(Op0, PredReg); 2028 dl = Op0->getDebugLoc(); 2029 return true; 2030} 2031 2032bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 2033 SmallVectorImpl<MachineInstr *> &Ops, 2034 unsigned Base, bool isLd, 2035 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 2036 bool RetVal = false; 2037 2038 // Sort by offset (in reverse order). 2039 std::sort(Ops.begin(), Ops.end(), 2040 [](const MachineInstr *LHS, const MachineInstr *RHS) { 2041 int LOffset = getMemoryOpOffset(LHS); 2042 int ROffset = getMemoryOpOffset(RHS); 2043 assert(LHS == RHS || LOffset != ROffset); 2044 return LOffset > ROffset; 2045 }); 2046 2047 // The loads / stores of the same base are in order. Scan them from first to 2048 // last and check for the following: 2049 // 1. Any def of base. 2050 // 2. Any gaps. 2051 while (Ops.size() > 1) { 2052 unsigned FirstLoc = ~0U; 2053 unsigned LastLoc = 0; 2054 MachineInstr *FirstOp = nullptr; 2055 MachineInstr *LastOp = nullptr; 2056 int LastOffset = 0; 2057 unsigned LastOpcode = 0; 2058 unsigned LastBytes = 0; 2059 unsigned NumMove = 0; 2060 for (int i = Ops.size() - 1; i >= 0; --i) { 2061 MachineInstr *Op = Ops[i]; 2062 unsigned Loc = MI2LocMap[Op]; 2063 if (Loc <= FirstLoc) { 2064 FirstLoc = Loc; 2065 FirstOp = Op; 2066 } 2067 if (Loc >= LastLoc) { 2068 LastLoc = Loc; 2069 LastOp = Op; 2070 } 2071 2072 unsigned LSMOpcode 2073 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 2074 if (LastOpcode && LSMOpcode != LastOpcode) 2075 break; 2076 2077 int Offset = getMemoryOpOffset(Op); 2078 unsigned Bytes = getLSMultipleTransferSize(Op); 2079 if (LastBytes) { 2080 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 2081 break; 2082 } 2083 LastOffset = Offset; 2084 LastBytes = Bytes; 2085 LastOpcode = LSMOpcode; 2086 if (++NumMove == 8) // FIXME: Tune this limit. 2087 break; 2088 } 2089 2090 if (NumMove <= 1) 2091 Ops.pop_back(); 2092 else { 2093 SmallPtrSet<MachineInstr*, 4> MemOps; 2094 SmallSet<unsigned, 4> MemRegs; 2095 for (int i = NumMove-1; i >= 0; --i) { 2096 MemOps.insert(Ops[i]); 2097 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 2098 } 2099 2100 // Be conservative, if the instructions are too far apart, don't 2101 // move them. We want to limit the increase of register pressure. 2102 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 2103 if (DoMove) 2104 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 2105 MemOps, MemRegs, TRI); 2106 if (!DoMove) { 2107 for (unsigned i = 0; i != NumMove; ++i) 2108 Ops.pop_back(); 2109 } else { 2110 // This is the new location for the loads / stores. 2111 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 2112 while (InsertPos != MBB->end() 2113 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 2114 ++InsertPos; 2115 2116 // If we are moving a pair of loads / stores, see if it makes sense 2117 // to try to allocate a pair of registers that can form register pairs. 2118 MachineInstr *Op0 = Ops.back(); 2119 MachineInstr *Op1 = Ops[Ops.size()-2]; 2120 unsigned EvenReg = 0, OddReg = 0; 2121 unsigned BaseReg = 0, PredReg = 0; 2122 ARMCC::CondCodes Pred = ARMCC::AL; 2123 bool isT2 = false; 2124 unsigned NewOpc = 0; 2125 int Offset = 0; 2126 DebugLoc dl; 2127 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 2128 EvenReg, OddReg, BaseReg, 2129 Offset, PredReg, Pred, isT2)) { 2130 Ops.pop_back(); 2131 Ops.pop_back(); 2132 2133 const MCInstrDesc &MCID = TII->get(NewOpc); 2134 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); 2135 MRI->constrainRegClass(EvenReg, TRC); 2136 MRI->constrainRegClass(OddReg, TRC); 2137 2138 // Form the pair instruction. 2139 if (isLd) { 2140 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2141 .addReg(EvenReg, RegState::Define) 2142 .addReg(OddReg, RegState::Define) 2143 .addReg(BaseReg); 2144 // FIXME: We're converting from LDRi12 to an insn that still 2145 // uses addrmode2, so we need an explicit offset reg. It should 2146 // always by reg0 since we're transforming LDRi12s. 2147 if (!isT2) 2148 MIB.addReg(0); 2149 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2150 concatenateMemOperands(MIB, Op0, Op1); 2151 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2152 ++NumLDRDFormed; 2153 } else { 2154 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 2155 .addReg(EvenReg) 2156 .addReg(OddReg) 2157 .addReg(BaseReg); 2158 // FIXME: We're converting from LDRi12 to an insn that still 2159 // uses addrmode2, so we need an explicit offset reg. It should 2160 // always by reg0 since we're transforming STRi12s. 2161 if (!isT2) 2162 MIB.addReg(0); 2163 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 2164 concatenateMemOperands(MIB, Op0, Op1); 2165 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 2166 ++NumSTRDFormed; 2167 } 2168 MBB->erase(Op0); 2169 MBB->erase(Op1); 2170 2171 // Add register allocation hints to form register pairs. 2172 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 2173 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 2174 } else { 2175 for (unsigned i = 0; i != NumMove; ++i) { 2176 MachineInstr *Op = Ops.back(); 2177 Ops.pop_back(); 2178 MBB->splice(InsertPos, MBB, Op); 2179 } 2180 } 2181 2182 NumLdStMoved += NumMove; 2183 RetVal = true; 2184 } 2185 } 2186 } 2187 2188 return RetVal; 2189} 2190 2191bool 2192ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 2193 bool RetVal = false; 2194 2195 DenseMap<MachineInstr*, unsigned> MI2LocMap; 2196 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 2197 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 2198 SmallVector<unsigned, 4> LdBases; 2199 SmallVector<unsigned, 4> StBases; 2200 2201 unsigned Loc = 0; 2202 MachineBasicBlock::iterator MBBI = MBB->begin(); 2203 MachineBasicBlock::iterator E = MBB->end(); 2204 while (MBBI != E) { 2205 for (; MBBI != E; ++MBBI) { 2206 MachineInstr *MI = MBBI; 2207 if (MI->isCall() || MI->isTerminator()) { 2208 // Stop at barriers. 2209 ++MBBI; 2210 break; 2211 } 2212 2213 if (!MI->isDebugValue()) 2214 MI2LocMap[MI] = ++Loc; 2215 2216 if (!isMemoryOp(MI)) 2217 continue; 2218 unsigned PredReg = 0; 2219 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 2220 continue; 2221 2222 int Opc = MI->getOpcode(); 2223 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 2224 unsigned Base = MI->getOperand(1).getReg(); 2225 int Offset = getMemoryOpOffset(MI); 2226 2227 bool StopHere = false; 2228 if (isLd) { 2229 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2230 Base2LdsMap.find(Base); 2231 if (BI != Base2LdsMap.end()) { 2232 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2233 if (Offset == getMemoryOpOffset(BI->second[i])) { 2234 StopHere = true; 2235 break; 2236 } 2237 } 2238 if (!StopHere) 2239 BI->second.push_back(MI); 2240 } else { 2241 Base2LdsMap[Base].push_back(MI); 2242 LdBases.push_back(Base); 2243 } 2244 } else { 2245 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 2246 Base2StsMap.find(Base); 2247 if (BI != Base2StsMap.end()) { 2248 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 2249 if (Offset == getMemoryOpOffset(BI->second[i])) { 2250 StopHere = true; 2251 break; 2252 } 2253 } 2254 if (!StopHere) 2255 BI->second.push_back(MI); 2256 } else { 2257 Base2StsMap[Base].push_back(MI); 2258 StBases.push_back(Base); 2259 } 2260 } 2261 2262 if (StopHere) { 2263 // Found a duplicate (a base+offset combination that's seen earlier). 2264 // Backtrack. 2265 --Loc; 2266 break; 2267 } 2268 } 2269 2270 // Re-schedule loads. 2271 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 2272 unsigned Base = LdBases[i]; 2273 SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base]; 2274 if (Lds.size() > 1) 2275 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 2276 } 2277 2278 // Re-schedule stores. 2279 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 2280 unsigned Base = StBases[i]; 2281 SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base]; 2282 if (Sts.size() > 1) 2283 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 2284 } 2285 2286 if (MBBI != E) { 2287 Base2LdsMap.clear(); 2288 Base2StsMap.clear(); 2289 LdBases.clear(); 2290 StBases.clear(); 2291 } 2292 } 2293 2294 return RetVal; 2295} 2296 2297 2298/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 2299/// optimization pass. 2300FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 2301 if (PreAlloc) 2302 return new ARMPreAllocLoadStoreOpt(); 2303 return new ARMLoadStoreOpt(); 2304} 2305