ARMLoadStoreOptimizer.cpp revision 08c66642d70879cc479b502c048df4f5fdeaefae
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "MCTargetDesc/ARMAddressingModes.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/CodeGen/SelectionDAGNodes.h" 30#include "llvm/Target/TargetData.h" 31#include "llvm/Target/TargetInstrInfo.h" 32#include "llvm/Target/TargetMachine.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/ADT/DenseMap.h" 38#include "llvm/ADT/STLExtras.h" 39#include "llvm/ADT/SmallPtrSet.h" 40#include "llvm/ADT/SmallSet.h" 41#include "llvm/ADT/SmallVector.h" 42#include "llvm/ADT/Statistic.h" 43using namespace llvm; 44 45STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58/// load / store instructions to form ldm / stm instructions. 59 60namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 94 int Offset, unsigned Base, bool BaseKill, int Opcode, 95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 97 void MergeOpsUpdate(MachineBasicBlock &MBB, 98 MemOpQueue &MemOps, 99 unsigned memOpsBegin, 100 unsigned memOpsEnd, 101 unsigned insertAfter, 102 int Offset, 103 unsigned Base, 104 bool BaseKill, 105 int Opcode, 106 ARMCC::CondCodes Pred, 107 unsigned PredReg, 108 unsigned Scratch, 109 DebugLoc dl, 110 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 112 int Opcode, unsigned Size, 113 ARMCC::CondCodes Pred, unsigned PredReg, 114 unsigned Scratch, MemOpQueue &MemOps, 115 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 116 117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator &MBBI); 120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator MBBI, 122 const TargetInstrInfo *TII, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 bool &Advance, 128 MachineBasicBlock::iterator &I); 129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 131 }; 132 char ARMLoadStoreOpt::ID = 0; 133} 134 135static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 136 switch (Opcode) { 137 default: llvm_unreachable("Unhandled opcode!"); 138 case ARM::LDRi12: 139 ++NumLDMGened; 140 switch (Mode) { 141 default: llvm_unreachable("Unhandled submode!"); 142 case ARM_AM::ia: return ARM::LDMIA; 143 case ARM_AM::da: return ARM::LDMDA; 144 case ARM_AM::db: return ARM::LDMDB; 145 case ARM_AM::ib: return ARM::LDMIB; 146 } 147 break; 148 case ARM::STRi12: 149 ++NumSTMGened; 150 switch (Mode) { 151 default: llvm_unreachable("Unhandled submode!"); 152 case ARM_AM::ia: return ARM::STMIA; 153 case ARM_AM::da: return ARM::STMDA; 154 case ARM_AM::db: return ARM::STMDB; 155 case ARM_AM::ib: return ARM::STMIB; 156 } 157 break; 158 case ARM::t2LDRi8: 159 case ARM::t2LDRi12: 160 ++NumLDMGened; 161 switch (Mode) { 162 default: llvm_unreachable("Unhandled submode!"); 163 case ARM_AM::ia: return ARM::t2LDMIA; 164 case ARM_AM::db: return ARM::t2LDMDB; 165 } 166 break; 167 case ARM::t2STRi8: 168 case ARM::t2STRi12: 169 ++NumSTMGened; 170 switch (Mode) { 171 default: llvm_unreachable("Unhandled submode!"); 172 case ARM_AM::ia: return ARM::t2STMIA; 173 case ARM_AM::db: return ARM::t2STMDB; 174 } 175 break; 176 case ARM::VLDRS: 177 ++NumVLDMGened; 178 switch (Mode) { 179 default: llvm_unreachable("Unhandled submode!"); 180 case ARM_AM::ia: return ARM::VLDMSIA; 181 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 182 } 183 break; 184 case ARM::VSTRS: 185 ++NumVSTMGened; 186 switch (Mode) { 187 default: llvm_unreachable("Unhandled submode!"); 188 case ARM_AM::ia: return ARM::VSTMSIA; 189 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 190 } 191 break; 192 case ARM::VLDRD: 193 ++NumVLDMGened; 194 switch (Mode) { 195 default: llvm_unreachable("Unhandled submode!"); 196 case ARM_AM::ia: return ARM::VLDMDIA; 197 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 198 } 199 break; 200 case ARM::VSTRD: 201 ++NumVSTMGened; 202 switch (Mode) { 203 default: llvm_unreachable("Unhandled submode!"); 204 case ARM_AM::ia: return ARM::VSTMDIA; 205 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 206 } 207 break; 208 } 209 210 return 0; 211} 212 213namespace llvm { 214 namespace ARM_AM { 215 216AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 217 switch (Opcode) { 218 default: llvm_unreachable("Unhandled opcode!"); 219 case ARM::LDMIA_RET: 220 case ARM::LDMIA: 221 case ARM::LDMIA_UPD: 222 case ARM::STMIA: 223 case ARM::STMIA_UPD: 224 case ARM::t2LDMIA_RET: 225 case ARM::t2LDMIA: 226 case ARM::t2LDMIA_UPD: 227 case ARM::t2STMIA: 228 case ARM::t2STMIA_UPD: 229 case ARM::VLDMSIA: 230 case ARM::VLDMSIA_UPD: 231 case ARM::VSTMSIA: 232 case ARM::VSTMSIA_UPD: 233 case ARM::VLDMDIA: 234 case ARM::VLDMDIA_UPD: 235 case ARM::VSTMDIA: 236 case ARM::VSTMDIA_UPD: 237 return ARM_AM::ia; 238 239 case ARM::LDMDA: 240 case ARM::LDMDA_UPD: 241 case ARM::STMDA: 242 case ARM::STMDA_UPD: 243 return ARM_AM::da; 244 245 case ARM::LDMDB: 246 case ARM::LDMDB_UPD: 247 case ARM::STMDB: 248 case ARM::STMDB_UPD: 249 case ARM::t2LDMDB: 250 case ARM::t2LDMDB_UPD: 251 case ARM::t2STMDB: 252 case ARM::t2STMDB_UPD: 253 case ARM::VLDMSDB_UPD: 254 case ARM::VSTMSDB_UPD: 255 case ARM::VLDMDDB_UPD: 256 case ARM::VSTMDDB_UPD: 257 return ARM_AM::db; 258 259 case ARM::LDMIB: 260 case ARM::LDMIB_UPD: 261 case ARM::STMIB: 262 case ARM::STMIB_UPD: 263 return ARM_AM::ib; 264 } 265 266 return ARM_AM::bad_am_submode; 267} 268 269 } // end namespace ARM_AM 270} // end namespace llvm 271 272static bool isT2i32Load(unsigned Opc) { 273 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 274} 275 276static bool isi32Load(unsigned Opc) { 277 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 278} 279 280static bool isT2i32Store(unsigned Opc) { 281 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 282} 283 284static bool isi32Store(unsigned Opc) { 285 return Opc == ARM::STRi12 || isT2i32Store(Opc); 286} 287 288/// MergeOps - Create and insert a LDM or STM with Base as base register and 289/// registers in Regs as the register operands that would be loaded / stored. 290/// It returns true if the transformation is done. 291bool 292ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 293 MachineBasicBlock::iterator MBBI, 294 int Offset, unsigned Base, bool BaseKill, 295 int Opcode, ARMCC::CondCodes Pred, 296 unsigned PredReg, unsigned Scratch, DebugLoc dl, 297 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 298 // Only a single register to load / store. Don't bother. 299 unsigned NumRegs = Regs.size(); 300 if (NumRegs <= 1) 301 return false; 302 303 ARM_AM::AMSubMode Mode = ARM_AM::ia; 304 // VFP and Thumb2 do not support IB or DA modes. 305 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 306 bool haveIBAndDA = isNotVFP && !isThumb2; 307 if (Offset == 4 && haveIBAndDA) 308 Mode = ARM_AM::ib; 309 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 310 Mode = ARM_AM::da; 311 else if (Offset == -4 * (int)NumRegs && isNotVFP) 312 // VLDM/VSTM do not support DB mode without also updating the base reg. 313 Mode = ARM_AM::db; 314 else if (Offset != 0) { 315 // Check if this is a supported opcode before we insert instructions to 316 // calculate a new base register. 317 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 318 319 // If starting offset isn't zero, insert a MI to materialize a new base. 320 // But only do so if it is cost effective, i.e. merging more than two 321 // loads / stores. 322 if (NumRegs <= 2) 323 return false; 324 325 unsigned NewBase; 326 if (isi32Load(Opcode)) 327 // If it is a load, then just use one of the destination register to 328 // use as the new base. 329 NewBase = Regs[NumRegs-1].first; 330 else { 331 // Use the scratch register to use as a new base. 332 NewBase = Scratch; 333 if (NewBase == 0) 334 return false; 335 } 336 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 337 if (Offset < 0) { 338 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 339 Offset = - Offset; 340 } 341 int ImmedOffset = isThumb2 342 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 343 if (ImmedOffset == -1) 344 // FIXME: Try t2ADDri12 or t2SUBri12? 345 return false; // Probably not worth it then. 346 347 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 348 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 349 .addImm(Pred).addReg(PredReg).addReg(0); 350 Base = NewBase; 351 BaseKill = true; // New base is always killed right its use. 352 } 353 354 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 355 Opcode == ARM::VLDRD); 356 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 357 if (!Opcode) return false; 358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 359 .addReg(Base, getKillRegState(BaseKill)) 360 .addImm(Pred).addReg(PredReg); 361 for (unsigned i = 0; i != NumRegs; ++i) 362 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 363 | getKillRegState(Regs[i].second)); 364 365 return true; 366} 367 368// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 369// success. 370void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 371 MemOpQueue &memOps, 372 unsigned memOpsBegin, unsigned memOpsEnd, 373 unsigned insertAfter, int Offset, 374 unsigned Base, bool BaseKill, 375 int Opcode, 376 ARMCC::CondCodes Pred, unsigned PredReg, 377 unsigned Scratch, 378 DebugLoc dl, 379 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 380 // First calculate which of the registers should be killed by the merged 381 // instruction. 382 const unsigned insertPos = memOps[insertAfter].Position; 383 SmallSet<unsigned, 4> KilledRegs; 384 DenseMap<unsigned, unsigned> Killer; 385 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 386 if (i == memOpsBegin) { 387 i = memOpsEnd; 388 if (i == e) 389 break; 390 } 391 if (memOps[i].Position < insertPos && memOps[i].isKill) { 392 unsigned Reg = memOps[i].Reg; 393 KilledRegs.insert(Reg); 394 Killer[Reg] = i; 395 } 396 } 397 398 SmallVector<std::pair<unsigned, bool>, 8> Regs; 399 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 400 unsigned Reg = memOps[i].Reg; 401 // If we are inserting the merged operation after an operation that 402 // uses the same register, make sure to transfer any kill flag. 403 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 404 Regs.push_back(std::make_pair(Reg, isKill)); 405 } 406 407 // Try to do the merge. 408 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 409 ++Loc; 410 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 411 Pred, PredReg, Scratch, dl, Regs)) 412 return; 413 414 // Merge succeeded, update records. 415 Merges.push_back(prior(Loc)); 416 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 417 // Remove kill flags from any memops that come before insertPos. 418 if (Regs[i-memOpsBegin].second) { 419 unsigned Reg = Regs[i-memOpsBegin].first; 420 if (KilledRegs.count(Reg)) { 421 unsigned j = Killer[Reg]; 422 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 423 assert(Idx >= 0 && "Cannot find killing operand"); 424 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 425 memOps[j].isKill = false; 426 } 427 memOps[i].isKill = true; 428 } 429 MBB.erase(memOps[i].MBBI); 430 // Update this memop to refer to the merged instruction. 431 // We may need to move kill flags again. 432 memOps[i].Merged = true; 433 memOps[i].MBBI = Merges.back(); 434 memOps[i].Position = insertPos; 435 } 436} 437 438/// MergeLDR_STR - Merge a number of load / store instructions into one or more 439/// load / store multiple instructions. 440void 441ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 442 unsigned Base, int Opcode, unsigned Size, 443 ARMCC::CondCodes Pred, unsigned PredReg, 444 unsigned Scratch, MemOpQueue &MemOps, 445 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 446 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 447 int Offset = MemOps[SIndex].Offset; 448 int SOffset = Offset; 449 unsigned insertAfter = SIndex; 450 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 451 DebugLoc dl = Loc->getDebugLoc(); 452 const MachineOperand &PMO = Loc->getOperand(0); 453 unsigned PReg = PMO.getReg(); 454 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 455 : getARMRegisterNumbering(PReg); 456 unsigned Count = 1; 457 unsigned Limit = ~0U; 458 459 // vldm / vstm limit are 32 for S variants, 16 for D variants. 460 461 switch (Opcode) { 462 default: break; 463 case ARM::VSTRS: 464 Limit = 32; 465 break; 466 case ARM::VSTRD: 467 Limit = 16; 468 break; 469 case ARM::VLDRD: 470 Limit = 16; 471 break; 472 case ARM::VLDRS: 473 Limit = 32; 474 break; 475 } 476 477 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 478 int NewOffset = MemOps[i].Offset; 479 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 480 unsigned Reg = MO.getReg(); 481 unsigned RegNum = MO.isUndef() ? UINT_MAX 482 : getARMRegisterNumbering(Reg); 483 // Register numbers must be in ascending order. For VFP / NEON load and 484 // store multiples, the registers must also be consecutive and within the 485 // limit on the number of registers per instruction. 486 if (Reg != ARM::SP && 487 NewOffset == Offset + (int)Size && 488 ((isNotVFP && RegNum > PRegNum) || 489 ((Count < Limit) && RegNum == PRegNum+1))) { 490 Offset += Size; 491 PRegNum = RegNum; 492 ++Count; 493 } else { 494 // Can't merge this in. Try merge the earlier ones first. 495 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 496 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 497 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 498 MemOps, Merges); 499 return; 500 } 501 502 if (MemOps[i].Position > MemOps[insertAfter].Position) 503 insertAfter = i; 504 } 505 506 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 507 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 508 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 509 return; 510} 511 512static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 513 unsigned Bytes, unsigned Limit, 514 ARMCC::CondCodes Pred, unsigned PredReg){ 515 unsigned MyPredReg = 0; 516 if (!MI) 517 return false; 518 if (MI->getOpcode() != ARM::t2SUBri && 519 MI->getOpcode() != ARM::tSUBspi && 520 MI->getOpcode() != ARM::SUBri) 521 return false; 522 523 // Make sure the offset fits in 8 bits. 524 if (Bytes == 0 || (Limit && Bytes >= Limit)) 525 return false; 526 527 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 528 return (MI->getOperand(0).getReg() == Base && 529 MI->getOperand(1).getReg() == Base && 530 (MI->getOperand(2).getImm()*Scale) == Bytes && 531 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 532 MyPredReg == PredReg); 533} 534 535static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 536 unsigned Bytes, unsigned Limit, 537 ARMCC::CondCodes Pred, unsigned PredReg){ 538 unsigned MyPredReg = 0; 539 if (!MI) 540 return false; 541 if (MI->getOpcode() != ARM::t2ADDri && 542 MI->getOpcode() != ARM::tADDspi && 543 MI->getOpcode() != ARM::ADDri) 544 return false; 545 546 if (Bytes == 0 || (Limit && Bytes >= Limit)) 547 // Make sure the offset fits in 8 bits. 548 return false; 549 550 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 551 return (MI->getOperand(0).getReg() == Base && 552 MI->getOperand(1).getReg() == Base && 553 (MI->getOperand(2).getImm()*Scale) == Bytes && 554 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 555 MyPredReg == PredReg); 556} 557 558static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 559 switch (MI->getOpcode()) { 560 default: return 0; 561 case ARM::LDRi12: 562 case ARM::STRi12: 563 case ARM::t2LDRi8: 564 case ARM::t2LDRi12: 565 case ARM::t2STRi8: 566 case ARM::t2STRi12: 567 case ARM::VLDRS: 568 case ARM::VSTRS: 569 return 4; 570 case ARM::VLDRD: 571 case ARM::VSTRD: 572 return 8; 573 case ARM::LDMIA: 574 case ARM::LDMDA: 575 case ARM::LDMDB: 576 case ARM::LDMIB: 577 case ARM::STMIA: 578 case ARM::STMDA: 579 case ARM::STMDB: 580 case ARM::STMIB: 581 case ARM::t2LDMIA: 582 case ARM::t2LDMDB: 583 case ARM::t2STMIA: 584 case ARM::t2STMDB: 585 case ARM::VLDMSIA: 586 case ARM::VSTMSIA: 587 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 588 case ARM::VLDMDIA: 589 case ARM::VSTMDIA: 590 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 591 } 592} 593 594static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 595 ARM_AM::AMSubMode Mode) { 596 switch (Opc) { 597 default: llvm_unreachable("Unhandled opcode!"); 598 case ARM::LDMIA: 599 case ARM::LDMDA: 600 case ARM::LDMDB: 601 case ARM::LDMIB: 602 switch (Mode) { 603 default: llvm_unreachable("Unhandled submode!"); 604 case ARM_AM::ia: return ARM::LDMIA_UPD; 605 case ARM_AM::ib: return ARM::LDMIB_UPD; 606 case ARM_AM::da: return ARM::LDMDA_UPD; 607 case ARM_AM::db: return ARM::LDMDB_UPD; 608 } 609 break; 610 case ARM::STMIA: 611 case ARM::STMDA: 612 case ARM::STMDB: 613 case ARM::STMIB: 614 switch (Mode) { 615 default: llvm_unreachable("Unhandled submode!"); 616 case ARM_AM::ia: return ARM::STMIA_UPD; 617 case ARM_AM::ib: return ARM::STMIB_UPD; 618 case ARM_AM::da: return ARM::STMDA_UPD; 619 case ARM_AM::db: return ARM::STMDB_UPD; 620 } 621 break; 622 case ARM::t2LDMIA: 623 case ARM::t2LDMDB: 624 switch (Mode) { 625 default: llvm_unreachable("Unhandled submode!"); 626 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 627 case ARM_AM::db: return ARM::t2LDMDB_UPD; 628 } 629 break; 630 case ARM::t2STMIA: 631 case ARM::t2STMDB: 632 switch (Mode) { 633 default: llvm_unreachable("Unhandled submode!"); 634 case ARM_AM::ia: return ARM::t2STMIA_UPD; 635 case ARM_AM::db: return ARM::t2STMDB_UPD; 636 } 637 break; 638 case ARM::VLDMSIA: 639 switch (Mode) { 640 default: llvm_unreachable("Unhandled submode!"); 641 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 642 case ARM_AM::db: return ARM::VLDMSDB_UPD; 643 } 644 break; 645 case ARM::VLDMDIA: 646 switch (Mode) { 647 default: llvm_unreachable("Unhandled submode!"); 648 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 649 case ARM_AM::db: return ARM::VLDMDDB_UPD; 650 } 651 break; 652 case ARM::VSTMSIA: 653 switch (Mode) { 654 default: llvm_unreachable("Unhandled submode!"); 655 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 656 case ARM_AM::db: return ARM::VSTMSDB_UPD; 657 } 658 break; 659 case ARM::VSTMDIA: 660 switch (Mode) { 661 default: llvm_unreachable("Unhandled submode!"); 662 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 663 case ARM_AM::db: return ARM::VSTMDDB_UPD; 664 } 665 break; 666 } 667 668 return 0; 669} 670 671/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 672/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 673/// 674/// stmia rn, <ra, rb, rc> 675/// rn := rn + 4 * 3; 676/// => 677/// stmia rn!, <ra, rb, rc> 678/// 679/// rn := rn - 4 * 3; 680/// ldmia rn, <ra, rb, rc> 681/// => 682/// ldmdb rn!, <ra, rb, rc> 683bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 684 MachineBasicBlock::iterator MBBI, 685 bool &Advance, 686 MachineBasicBlock::iterator &I) { 687 MachineInstr *MI = MBBI; 688 unsigned Base = MI->getOperand(0).getReg(); 689 bool BaseKill = MI->getOperand(0).isKill(); 690 unsigned Bytes = getLSMultipleTransferSize(MI); 691 unsigned PredReg = 0; 692 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 693 int Opcode = MI->getOpcode(); 694 DebugLoc dl = MI->getDebugLoc(); 695 696 // Can't use an updating ld/st if the base register is also a dest 697 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 698 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 699 if (MI->getOperand(i).getReg() == Base) 700 return false; 701 702 bool DoMerge = false; 703 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 704 705 // Try merging with the previous instruction. 706 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 707 if (MBBI != BeginMBBI) { 708 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 709 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 710 --PrevMBBI; 711 if (Mode == ARM_AM::ia && 712 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 713 Mode = ARM_AM::db; 714 DoMerge = true; 715 } else if (Mode == ARM_AM::ib && 716 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 717 Mode = ARM_AM::da; 718 DoMerge = true; 719 } 720 if (DoMerge) 721 MBB.erase(PrevMBBI); 722 } 723 724 // Try merging with the next instruction. 725 MachineBasicBlock::iterator EndMBBI = MBB.end(); 726 if (!DoMerge && MBBI != EndMBBI) { 727 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 728 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 729 ++NextMBBI; 730 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 731 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 732 DoMerge = true; 733 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 734 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 735 DoMerge = true; 736 } 737 if (DoMerge) { 738 if (NextMBBI == I) { 739 Advance = true; 740 ++I; 741 } 742 MBB.erase(NextMBBI); 743 } 744 } 745 746 if (!DoMerge) 747 return false; 748 749 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 750 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 751 .addReg(Base, getDefRegState(true)) // WB base register 752 .addReg(Base, getKillRegState(BaseKill)) 753 .addImm(Pred).addReg(PredReg); 754 755 // Transfer the rest of operands. 756 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 757 MIB.addOperand(MI->getOperand(OpNum)); 758 759 // Transfer memoperands. 760 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 761 762 MBB.erase(MBBI); 763 return true; 764} 765 766static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 767 ARM_AM::AddrOpc Mode) { 768 switch (Opc) { 769 case ARM::LDRi12: 770 return ARM::LDR_PRE_IMM; 771 case ARM::STRi12: 772 return ARM::STR_PRE_IMM; 773 case ARM::VLDRS: 774 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 775 case ARM::VLDRD: 776 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 777 case ARM::VSTRS: 778 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 779 case ARM::VSTRD: 780 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 781 case ARM::t2LDRi8: 782 case ARM::t2LDRi12: 783 return ARM::t2LDR_PRE; 784 case ARM::t2STRi8: 785 case ARM::t2STRi12: 786 return ARM::t2STR_PRE; 787 default: llvm_unreachable("Unhandled opcode!"); 788 } 789 return 0; 790} 791 792static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 793 ARM_AM::AddrOpc Mode) { 794 switch (Opc) { 795 case ARM::LDRi12: 796 return ARM::LDR_POST_IMM; 797 case ARM::STRi12: 798 return ARM::STR_POST_IMM; 799 case ARM::VLDRS: 800 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 801 case ARM::VLDRD: 802 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 803 case ARM::VSTRS: 804 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 805 case ARM::VSTRD: 806 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 807 case ARM::t2LDRi8: 808 case ARM::t2LDRi12: 809 return ARM::t2LDR_POST; 810 case ARM::t2STRi8: 811 case ARM::t2STRi12: 812 return ARM::t2STR_POST; 813 default: llvm_unreachable("Unhandled opcode!"); 814 } 815 return 0; 816} 817 818/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 819/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 820bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 821 MachineBasicBlock::iterator MBBI, 822 const TargetInstrInfo *TII, 823 bool &Advance, 824 MachineBasicBlock::iterator &I) { 825 MachineInstr *MI = MBBI; 826 unsigned Base = MI->getOperand(1).getReg(); 827 bool BaseKill = MI->getOperand(1).isKill(); 828 unsigned Bytes = getLSMultipleTransferSize(MI); 829 int Opcode = MI->getOpcode(); 830 DebugLoc dl = MI->getDebugLoc(); 831 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 832 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 833 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 834 if (isi32Load(Opcode) || isi32Store(Opcode)) 835 if (MI->getOperand(2).getImm() != 0) 836 return false; 837 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 838 return false; 839 840 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 841 // Can't do the merge if the destination register is the same as the would-be 842 // writeback register. 843 if (isLd && MI->getOperand(0).getReg() == Base) 844 return false; 845 846 unsigned PredReg = 0; 847 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 848 bool DoMerge = false; 849 ARM_AM::AddrOpc AddSub = ARM_AM::add; 850 unsigned NewOpc = 0; 851 // AM2 - 12 bits, thumb2 - 8 bits. 852 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 853 854 // Try merging with the previous instruction. 855 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 856 if (MBBI != BeginMBBI) { 857 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 858 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 859 --PrevMBBI; 860 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 861 DoMerge = true; 862 AddSub = ARM_AM::sub; 863 } else if (!isAM5 && 864 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 865 DoMerge = true; 866 } 867 if (DoMerge) { 868 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 869 MBB.erase(PrevMBBI); 870 } 871 } 872 873 // Try merging with the next instruction. 874 MachineBasicBlock::iterator EndMBBI = MBB.end(); 875 if (!DoMerge && MBBI != EndMBBI) { 876 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 877 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 878 ++NextMBBI; 879 if (!isAM5 && 880 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 881 DoMerge = true; 882 AddSub = ARM_AM::sub; 883 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 884 DoMerge = true; 885 } 886 if (DoMerge) { 887 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 888 if (NextMBBI == I) { 889 Advance = true; 890 ++I; 891 } 892 MBB.erase(NextMBBI); 893 } 894 } 895 896 if (!DoMerge) 897 return false; 898 899 if (isAM5) { 900 // VLDM[SD}_UPD, VSTM[SD]_UPD 901 // (There are no base-updating versions of VLDR/VSTR instructions, but the 902 // updating load/store-multiple instructions can be used with only one 903 // register.) 904 MachineOperand &MO = MI->getOperand(0); 905 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 906 .addReg(Base, getDefRegState(true)) // WB base register 907 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 908 .addImm(Pred).addReg(PredReg) 909 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 910 getKillRegState(MO.isKill()))); 911 } else if (isLd) { 912 if (isAM2) { 913 // LDR_PRE, LDR_POST 914 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 915 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 916 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 917 .addReg(Base, RegState::Define) 918 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 919 } else { 920 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 921 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 922 .addReg(Base, RegState::Define) 923 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 924 } 925 } else { 926 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 927 // t2LDR_PRE, t2LDR_POST 928 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 929 .addReg(Base, RegState::Define) 930 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 931 } 932 } else { 933 MachineOperand &MO = MI->getOperand(0); 934 // FIXME: post-indexed stores use am2offset_imm, which still encodes 935 // the vestigal zero-reg offset register. When that's fixed, this clause 936 // can be removed entirely. 937 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 938 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 939 // STR_PRE, STR_POST 940 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 941 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 942 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 943 } else { 944 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 945 // t2STR_PRE, t2STR_POST 946 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 947 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 948 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 949 } 950 } 951 MBB.erase(MBBI); 952 953 return true; 954} 955 956/// isMemoryOp - Returns true if instruction is a memory operation that this 957/// pass is capable of operating on. 958static bool isMemoryOp(const MachineInstr *MI) { 959 // When no memory operands are present, conservatively assume unaligned, 960 // volatile, unfoldable. 961 if (!MI->hasOneMemOperand()) 962 return false; 963 964 const MachineMemOperand *MMO = *MI->memoperands_begin(); 965 966 // Don't touch volatile memory accesses - we may be changing their order. 967 if (MMO->isVolatile()) 968 return false; 969 970 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 971 // not. 972 if (MMO->getAlignment() < 4) 973 return false; 974 975 // str <undef> could probably be eliminated entirely, but for now we just want 976 // to avoid making a mess of it. 977 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 978 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 979 MI->getOperand(0).isUndef()) 980 return false; 981 982 // Likewise don't mess with references to undefined addresses. 983 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 984 MI->getOperand(1).isUndef()) 985 return false; 986 987 int Opcode = MI->getOpcode(); 988 switch (Opcode) { 989 default: break; 990 case ARM::VLDRS: 991 case ARM::VSTRS: 992 return MI->getOperand(1).isReg(); 993 case ARM::VLDRD: 994 case ARM::VSTRD: 995 return MI->getOperand(1).isReg(); 996 case ARM::LDRi12: 997 case ARM::STRi12: 998 case ARM::t2LDRi8: 999 case ARM::t2LDRi12: 1000 case ARM::t2STRi8: 1001 case ARM::t2STRi12: 1002 return MI->getOperand(1).isReg(); 1003 } 1004 return false; 1005} 1006 1007/// AdvanceRS - Advance register scavenger to just before the earliest memory 1008/// op that is being merged. 1009void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1010 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1011 unsigned Position = MemOps[0].Position; 1012 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1013 if (MemOps[i].Position < Position) { 1014 Position = MemOps[i].Position; 1015 Loc = MemOps[i].MBBI; 1016 } 1017 } 1018 1019 if (Loc != MBB.begin()) 1020 RS->forward(prior(Loc)); 1021} 1022 1023static int getMemoryOpOffset(const MachineInstr *MI) { 1024 int Opcode = MI->getOpcode(); 1025 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1026 unsigned NumOperands = MI->getDesc().getNumOperands(); 1027 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1028 1029 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1030 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1031 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1032 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1033 return OffField; 1034 1035 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1036 : ARM_AM::getAM5Offset(OffField) * 4; 1037 if (isAM3) { 1038 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1039 Offset = -Offset; 1040 } else { 1041 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1042 Offset = -Offset; 1043 } 1044 return Offset; 1045} 1046 1047static void InsertLDR_STR(MachineBasicBlock &MBB, 1048 MachineBasicBlock::iterator &MBBI, 1049 int Offset, bool isDef, 1050 DebugLoc dl, unsigned NewOpc, 1051 unsigned Reg, bool RegDeadKill, bool RegUndef, 1052 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1053 bool OffKill, bool OffUndef, 1054 ARMCC::CondCodes Pred, unsigned PredReg, 1055 const TargetInstrInfo *TII, bool isT2) { 1056 if (isDef) { 1057 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1058 TII->get(NewOpc)) 1059 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1060 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1061 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1062 } else { 1063 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1064 TII->get(NewOpc)) 1065 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1066 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1067 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1068 } 1069} 1070 1071bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1072 MachineBasicBlock::iterator &MBBI) { 1073 MachineInstr *MI = &*MBBI; 1074 unsigned Opcode = MI->getOpcode(); 1075 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1076 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1077 const MachineOperand &BaseOp = MI->getOperand(2); 1078 unsigned BaseReg = BaseOp.getReg(); 1079 unsigned EvenReg = MI->getOperand(0).getReg(); 1080 unsigned OddReg = MI->getOperand(1).getReg(); 1081 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1082 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1083 // ARM errata 602117: LDRD with base in list may result in incorrect base 1084 // register when interrupted or faulted. 1085 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1086 if (!Errata602117 && 1087 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1088 return false; 1089 1090 MachineBasicBlock::iterator NewBBI = MBBI; 1091 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1092 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1093 bool EvenDeadKill = isLd ? 1094 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1095 bool EvenUndef = MI->getOperand(0).isUndef(); 1096 bool OddDeadKill = isLd ? 1097 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1098 bool OddUndef = MI->getOperand(1).isUndef(); 1099 bool BaseKill = BaseOp.isKill(); 1100 bool BaseUndef = BaseOp.isUndef(); 1101 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1102 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1103 int OffImm = getMemoryOpOffset(MI); 1104 unsigned PredReg = 0; 1105 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1106 1107 if (OddRegNum > EvenRegNum && OffImm == 0) { 1108 // Ascending register numbers and no offset. It's safe to change it to a 1109 // ldm or stm. 1110 unsigned NewOpc = (isLd) 1111 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1112 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1113 if (isLd) { 1114 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1115 .addReg(BaseReg, getKillRegState(BaseKill)) 1116 .addImm(Pred).addReg(PredReg) 1117 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1118 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1119 ++NumLDRD2LDM; 1120 } else { 1121 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1122 .addReg(BaseReg, getKillRegState(BaseKill)) 1123 .addImm(Pred).addReg(PredReg) 1124 .addReg(EvenReg, 1125 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1126 .addReg(OddReg, 1127 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1128 ++NumSTRD2STM; 1129 } 1130 NewBBI = llvm::prior(MBBI); 1131 } else { 1132 // Split into two instructions. 1133 unsigned NewOpc = (isLd) 1134 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1135 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1136 DebugLoc dl = MBBI->getDebugLoc(); 1137 // If this is a load and base register is killed, it may have been 1138 // re-defed by the load, make sure the first load does not clobber it. 1139 if (isLd && 1140 (BaseKill || OffKill) && 1141 (TRI->regsOverlap(EvenReg, BaseReg))) { 1142 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1143 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1144 OddReg, OddDeadKill, false, 1145 BaseReg, false, BaseUndef, false, OffUndef, 1146 Pred, PredReg, TII, isT2); 1147 NewBBI = llvm::prior(MBBI); 1148 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1149 EvenReg, EvenDeadKill, false, 1150 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1151 Pred, PredReg, TII, isT2); 1152 } else { 1153 if (OddReg == EvenReg && EvenDeadKill) { 1154 // If the two source operands are the same, the kill marker is 1155 // probably on the first one. e.g. 1156 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1157 EvenDeadKill = false; 1158 OddDeadKill = true; 1159 } 1160 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1161 EvenReg, EvenDeadKill, EvenUndef, 1162 BaseReg, false, BaseUndef, false, OffUndef, 1163 Pred, PredReg, TII, isT2); 1164 NewBBI = llvm::prior(MBBI); 1165 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1166 OddReg, OddDeadKill, OddUndef, 1167 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1168 Pred, PredReg, TII, isT2); 1169 } 1170 if (isLd) 1171 ++NumLDRD2LDR; 1172 else 1173 ++NumSTRD2STR; 1174 } 1175 1176 MBB.erase(MI); 1177 MBBI = NewBBI; 1178 return true; 1179 } 1180 return false; 1181} 1182 1183/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1184/// ops of the same base and incrementing offset into LDM / STM ops. 1185bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1186 unsigned NumMerges = 0; 1187 unsigned NumMemOps = 0; 1188 MemOpQueue MemOps; 1189 unsigned CurrBase = 0; 1190 int CurrOpc = -1; 1191 unsigned CurrSize = 0; 1192 ARMCC::CondCodes CurrPred = ARMCC::AL; 1193 unsigned CurrPredReg = 0; 1194 unsigned Position = 0; 1195 SmallVector<MachineBasicBlock::iterator,4> Merges; 1196 1197 RS->enterBasicBlock(&MBB); 1198 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1199 while (MBBI != E) { 1200 if (FixInvalidRegPairOp(MBB, MBBI)) 1201 continue; 1202 1203 bool Advance = false; 1204 bool TryMerge = false; 1205 bool Clobber = false; 1206 1207 bool isMemOp = isMemoryOp(MBBI); 1208 if (isMemOp) { 1209 int Opcode = MBBI->getOpcode(); 1210 unsigned Size = getLSMultipleTransferSize(MBBI); 1211 const MachineOperand &MO = MBBI->getOperand(0); 1212 unsigned Reg = MO.getReg(); 1213 bool isKill = MO.isDef() ? false : MO.isKill(); 1214 unsigned Base = MBBI->getOperand(1).getReg(); 1215 unsigned PredReg = 0; 1216 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1217 int Offset = getMemoryOpOffset(MBBI); 1218 // Watch out for: 1219 // r4 := ldr [r5] 1220 // r5 := ldr [r5, #4] 1221 // r6 := ldr [r5, #8] 1222 // 1223 // The second ldr has effectively broken the chain even though it 1224 // looks like the later ldr(s) use the same base register. Try to 1225 // merge the ldr's so far, including this one. But don't try to 1226 // combine the following ldr(s). 1227 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1228 if (CurrBase == 0 && !Clobber) { 1229 // Start of a new chain. 1230 CurrBase = Base; 1231 CurrOpc = Opcode; 1232 CurrSize = Size; 1233 CurrPred = Pred; 1234 CurrPredReg = PredReg; 1235 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1236 ++NumMemOps; 1237 Advance = true; 1238 } else { 1239 if (Clobber) { 1240 TryMerge = true; 1241 Advance = true; 1242 } 1243 1244 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1245 // No need to match PredReg. 1246 // Continue adding to the queue. 1247 if (Offset > MemOps.back().Offset) { 1248 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1249 Position, MBBI)); 1250 ++NumMemOps; 1251 Advance = true; 1252 } else { 1253 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1254 I != E; ++I) { 1255 if (Offset < I->Offset) { 1256 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1257 Position, MBBI)); 1258 ++NumMemOps; 1259 Advance = true; 1260 break; 1261 } else if (Offset == I->Offset) { 1262 // Collision! This can't be merged! 1263 break; 1264 } 1265 } 1266 } 1267 } 1268 } 1269 } 1270 1271 if (MBBI->isDebugValue()) { 1272 ++MBBI; 1273 if (MBBI == E) 1274 // Reach the end of the block, try merging the memory instructions. 1275 TryMerge = true; 1276 } else if (Advance) { 1277 ++Position; 1278 ++MBBI; 1279 if (MBBI == E) 1280 // Reach the end of the block, try merging the memory instructions. 1281 TryMerge = true; 1282 } else 1283 TryMerge = true; 1284 1285 if (TryMerge) { 1286 if (NumMemOps > 1) { 1287 // Try to find a free register to use as a new base in case it's needed. 1288 // First advance to the instruction just before the start of the chain. 1289 AdvanceRS(MBB, MemOps); 1290 // Find a scratch register. 1291 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1292 // Process the load / store instructions. 1293 RS->forward(prior(MBBI)); 1294 1295 // Merge ops. 1296 Merges.clear(); 1297 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1298 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1299 1300 // Try folding preceding/trailing base inc/dec into the generated 1301 // LDM/STM ops. 1302 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1303 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1304 ++NumMerges; 1305 NumMerges += Merges.size(); 1306 1307 // Try folding preceding/trailing base inc/dec into those load/store 1308 // that were not merged to form LDM/STM ops. 1309 for (unsigned i = 0; i != NumMemOps; ++i) 1310 if (!MemOps[i].Merged) 1311 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1312 ++NumMerges; 1313 1314 // RS may be pointing to an instruction that's deleted. 1315 RS->skipTo(prior(MBBI)); 1316 } else if (NumMemOps == 1) { 1317 // Try folding preceding/trailing base inc/dec into the single 1318 // load/store. 1319 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1320 ++NumMerges; 1321 RS->forward(prior(MBBI)); 1322 } 1323 } 1324 1325 CurrBase = 0; 1326 CurrOpc = -1; 1327 CurrSize = 0; 1328 CurrPred = ARMCC::AL; 1329 CurrPredReg = 0; 1330 if (NumMemOps) { 1331 MemOps.clear(); 1332 NumMemOps = 0; 1333 } 1334 1335 // If iterator hasn't been advanced and this is not a memory op, skip it. 1336 // It can't start a new chain anyway. 1337 if (!Advance && !isMemOp && MBBI != E) { 1338 ++Position; 1339 ++MBBI; 1340 } 1341 } 1342 } 1343 return NumMerges > 0; 1344} 1345 1346/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1347/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1348/// directly restore the value of LR into pc. 1349/// ldmfd sp!, {..., lr} 1350/// bx lr 1351/// or 1352/// ldmfd sp!, {..., lr} 1353/// mov pc, lr 1354/// => 1355/// ldmfd sp!, {..., pc} 1356bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1357 if (MBB.empty()) return false; 1358 1359 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1360 if (MBBI != MBB.begin() && 1361 (MBBI->getOpcode() == ARM::BX_RET || 1362 MBBI->getOpcode() == ARM::tBX_RET || 1363 MBBI->getOpcode() == ARM::MOVPCLR)) { 1364 MachineInstr *PrevMI = prior(MBBI); 1365 unsigned Opcode = PrevMI->getOpcode(); 1366 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1367 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1368 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1369 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1370 if (MO.getReg() != ARM::LR) 1371 return false; 1372 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1373 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1374 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1375 PrevMI->setDesc(TII->get(NewOpc)); 1376 MO.setReg(ARM::PC); 1377 PrevMI->copyImplicitOps(&*MBBI); 1378 MBB.erase(MBBI); 1379 return true; 1380 } 1381 } 1382 return false; 1383} 1384 1385bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1386 const TargetMachine &TM = Fn.getTarget(); 1387 AFI = Fn.getInfo<ARMFunctionInfo>(); 1388 TII = TM.getInstrInfo(); 1389 TRI = TM.getRegisterInfo(); 1390 STI = &TM.getSubtarget<ARMSubtarget>(); 1391 RS = new RegScavenger(); 1392 isThumb2 = AFI->isThumb2Function(); 1393 1394 bool Modified = false; 1395 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1396 ++MFI) { 1397 MachineBasicBlock &MBB = *MFI; 1398 Modified |= LoadStoreMultipleOpti(MBB); 1399 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1400 Modified |= MergeReturnIntoLDM(MBB); 1401 } 1402 1403 delete RS; 1404 return Modified; 1405} 1406 1407 1408/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1409/// load / stores from consecutive locations close to make it more 1410/// likely they will be combined later. 1411 1412namespace { 1413 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1414 static char ID; 1415 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1416 1417 const TargetData *TD; 1418 const TargetInstrInfo *TII; 1419 const TargetRegisterInfo *TRI; 1420 const ARMSubtarget *STI; 1421 MachineRegisterInfo *MRI; 1422 MachineFunction *MF; 1423 1424 virtual bool runOnMachineFunction(MachineFunction &Fn); 1425 1426 virtual const char *getPassName() const { 1427 return "ARM pre- register allocation load / store optimization pass"; 1428 } 1429 1430 private: 1431 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1432 unsigned &NewOpc, unsigned &EvenReg, 1433 unsigned &OddReg, unsigned &BaseReg, 1434 int &Offset, 1435 unsigned &PredReg, ARMCC::CondCodes &Pred, 1436 bool &isT2); 1437 bool RescheduleOps(MachineBasicBlock *MBB, 1438 SmallVector<MachineInstr*, 4> &Ops, 1439 unsigned Base, bool isLd, 1440 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1441 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1442 }; 1443 char ARMPreAllocLoadStoreOpt::ID = 0; 1444} 1445 1446bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1447 TD = Fn.getTarget().getTargetData(); 1448 TII = Fn.getTarget().getInstrInfo(); 1449 TRI = Fn.getTarget().getRegisterInfo(); 1450 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1451 MRI = &Fn.getRegInfo(); 1452 MF = &Fn; 1453 1454 bool Modified = false; 1455 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1456 ++MFI) 1457 Modified |= RescheduleLoadStoreInstrs(MFI); 1458 1459 return Modified; 1460} 1461 1462static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1463 MachineBasicBlock::iterator I, 1464 MachineBasicBlock::iterator E, 1465 SmallPtrSet<MachineInstr*, 4> &MemOps, 1466 SmallSet<unsigned, 4> &MemRegs, 1467 const TargetRegisterInfo *TRI) { 1468 // Are there stores / loads / calls between them? 1469 // FIXME: This is overly conservative. We should make use of alias information 1470 // some day. 1471 SmallSet<unsigned, 4> AddedRegPressure; 1472 while (++I != E) { 1473 if (I->isDebugValue() || MemOps.count(&*I)) 1474 continue; 1475 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1476 return false; 1477 if (isLd && I->mayStore()) 1478 return false; 1479 if (!isLd) { 1480 if (I->mayLoad()) 1481 return false; 1482 // It's not safe to move the first 'str' down. 1483 // str r1, [r0] 1484 // strh r5, [r0] 1485 // str r4, [r0, #+4] 1486 if (I->mayStore()) 1487 return false; 1488 } 1489 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1490 MachineOperand &MO = I->getOperand(j); 1491 if (!MO.isReg()) 1492 continue; 1493 unsigned Reg = MO.getReg(); 1494 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1495 return false; 1496 if (Reg != Base && !MemRegs.count(Reg)) 1497 AddedRegPressure.insert(Reg); 1498 } 1499 } 1500 1501 // Estimate register pressure increase due to the transformation. 1502 if (MemRegs.size() <= 4) 1503 // Ok if we are moving small number of instructions. 1504 return true; 1505 return AddedRegPressure.size() <= MemRegs.size() * 2; 1506} 1507 1508 1509/// Copy Op0 and Op1 operands into a new array assigned to MI. 1510static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1511 MachineInstr *Op1) { 1512 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1513 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1514 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1515 1516 MachineFunction *MF = MI->getParent()->getParent(); 1517 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1518 MachineSDNode::mmo_iterator MemEnd = 1519 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1520 MemEnd = 1521 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1522 MI->setMemRefs(MemBegin, MemEnd); 1523} 1524 1525bool 1526ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1527 DebugLoc &dl, 1528 unsigned &NewOpc, unsigned &EvenReg, 1529 unsigned &OddReg, unsigned &BaseReg, 1530 int &Offset, unsigned &PredReg, 1531 ARMCC::CondCodes &Pred, 1532 bool &isT2) { 1533 // Make sure we're allowed to generate LDRD/STRD. 1534 if (!STI->hasV5TEOps()) 1535 return false; 1536 1537 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1538 unsigned Scale = 1; 1539 unsigned Opcode = Op0->getOpcode(); 1540 if (Opcode == ARM::LDRi12) 1541 NewOpc = ARM::LDRD; 1542 else if (Opcode == ARM::STRi12) 1543 NewOpc = ARM::STRD; 1544 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1545 NewOpc = ARM::t2LDRDi8; 1546 Scale = 4; 1547 isT2 = true; 1548 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1549 NewOpc = ARM::t2STRDi8; 1550 Scale = 4; 1551 isT2 = true; 1552 } else 1553 return false; 1554 1555 // Make sure the base address satisfies i64 ld / st alignment requirement. 1556 if (!Op0->hasOneMemOperand() || 1557 !(*Op0->memoperands_begin())->getValue() || 1558 (*Op0->memoperands_begin())->isVolatile()) 1559 return false; 1560 1561 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1562 const Function *Func = MF->getFunction(); 1563 unsigned ReqAlign = STI->hasV6Ops() 1564 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1565 : 8; // Pre-v6 need 8-byte align 1566 if (Align < ReqAlign) 1567 return false; 1568 1569 // Then make sure the immediate offset fits. 1570 int OffImm = getMemoryOpOffset(Op0); 1571 if (isT2) { 1572 int Limit = (1 << 8) * Scale; 1573 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1574 return false; 1575 Offset = OffImm; 1576 } else { 1577 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1578 if (OffImm < 0) { 1579 AddSub = ARM_AM::sub; 1580 OffImm = - OffImm; 1581 } 1582 int Limit = (1 << 8) * Scale; 1583 if (OffImm >= Limit || (OffImm & (Scale-1))) 1584 return false; 1585 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1586 } 1587 EvenReg = Op0->getOperand(0).getReg(); 1588 OddReg = Op1->getOperand(0).getReg(); 1589 if (EvenReg == OddReg) 1590 return false; 1591 BaseReg = Op0->getOperand(1).getReg(); 1592 Pred = llvm::getInstrPredicate(Op0, PredReg); 1593 dl = Op0->getDebugLoc(); 1594 return true; 1595} 1596 1597namespace { 1598 struct OffsetCompare { 1599 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1600 int LOffset = getMemoryOpOffset(LHS); 1601 int ROffset = getMemoryOpOffset(RHS); 1602 assert(LHS == RHS || LOffset != ROffset); 1603 return LOffset > ROffset; 1604 } 1605 }; 1606} 1607 1608bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1609 SmallVector<MachineInstr*, 4> &Ops, 1610 unsigned Base, bool isLd, 1611 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1612 bool RetVal = false; 1613 1614 // Sort by offset (in reverse order). 1615 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1616 1617 // The loads / stores of the same base are in order. Scan them from first to 1618 // last and check for the following: 1619 // 1. Any def of base. 1620 // 2. Any gaps. 1621 while (Ops.size() > 1) { 1622 unsigned FirstLoc = ~0U; 1623 unsigned LastLoc = 0; 1624 MachineInstr *FirstOp = 0; 1625 MachineInstr *LastOp = 0; 1626 int LastOffset = 0; 1627 unsigned LastOpcode = 0; 1628 unsigned LastBytes = 0; 1629 unsigned NumMove = 0; 1630 for (int i = Ops.size() - 1; i >= 0; --i) { 1631 MachineInstr *Op = Ops[i]; 1632 unsigned Loc = MI2LocMap[Op]; 1633 if (Loc <= FirstLoc) { 1634 FirstLoc = Loc; 1635 FirstOp = Op; 1636 } 1637 if (Loc >= LastLoc) { 1638 LastLoc = Loc; 1639 LastOp = Op; 1640 } 1641 1642 unsigned LSMOpcode 1643 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1644 if (LastOpcode && LSMOpcode != LastOpcode) 1645 break; 1646 1647 int Offset = getMemoryOpOffset(Op); 1648 unsigned Bytes = getLSMultipleTransferSize(Op); 1649 if (LastBytes) { 1650 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1651 break; 1652 } 1653 LastOffset = Offset; 1654 LastBytes = Bytes; 1655 LastOpcode = LSMOpcode; 1656 if (++NumMove == 8) // FIXME: Tune this limit. 1657 break; 1658 } 1659 1660 if (NumMove <= 1) 1661 Ops.pop_back(); 1662 else { 1663 SmallPtrSet<MachineInstr*, 4> MemOps; 1664 SmallSet<unsigned, 4> MemRegs; 1665 for (int i = NumMove-1; i >= 0; --i) { 1666 MemOps.insert(Ops[i]); 1667 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1668 } 1669 1670 // Be conservative, if the instructions are too far apart, don't 1671 // move them. We want to limit the increase of register pressure. 1672 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1673 if (DoMove) 1674 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1675 MemOps, MemRegs, TRI); 1676 if (!DoMove) { 1677 for (unsigned i = 0; i != NumMove; ++i) 1678 Ops.pop_back(); 1679 } else { 1680 // This is the new location for the loads / stores. 1681 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1682 while (InsertPos != MBB->end() 1683 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1684 ++InsertPos; 1685 1686 // If we are moving a pair of loads / stores, see if it makes sense 1687 // to try to allocate a pair of registers that can form register pairs. 1688 MachineInstr *Op0 = Ops.back(); 1689 MachineInstr *Op1 = Ops[Ops.size()-2]; 1690 unsigned EvenReg = 0, OddReg = 0; 1691 unsigned BaseReg = 0, PredReg = 0; 1692 ARMCC::CondCodes Pred = ARMCC::AL; 1693 bool isT2 = false; 1694 unsigned NewOpc = 0; 1695 int Offset = 0; 1696 DebugLoc dl; 1697 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1698 EvenReg, OddReg, BaseReg, 1699 Offset, PredReg, Pred, isT2)) { 1700 Ops.pop_back(); 1701 Ops.pop_back(); 1702 1703 const MCInstrDesc &MCID = TII->get(NewOpc); 1704 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1705 MRI->constrainRegClass(EvenReg, TRC); 1706 MRI->constrainRegClass(OddReg, TRC); 1707 1708 // Form the pair instruction. 1709 if (isLd) { 1710 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1711 .addReg(EvenReg, RegState::Define) 1712 .addReg(OddReg, RegState::Define) 1713 .addReg(BaseReg); 1714 // FIXME: We're converting from LDRi12 to an insn that still 1715 // uses addrmode2, so we need an explicit offset reg. It should 1716 // always by reg0 since we're transforming LDRi12s. 1717 if (!isT2) 1718 MIB.addReg(0); 1719 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1720 concatenateMemOperands(MIB, Op0, Op1); 1721 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1722 ++NumLDRDFormed; 1723 } else { 1724 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1725 .addReg(EvenReg) 1726 .addReg(OddReg) 1727 .addReg(BaseReg); 1728 // FIXME: We're converting from LDRi12 to an insn that still 1729 // uses addrmode2, so we need an explicit offset reg. It should 1730 // always by reg0 since we're transforming STRi12s. 1731 if (!isT2) 1732 MIB.addReg(0); 1733 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1734 concatenateMemOperands(MIB, Op0, Op1); 1735 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1736 ++NumSTRDFormed; 1737 } 1738 MBB->erase(Op0); 1739 MBB->erase(Op1); 1740 1741 // Add register allocation hints to form register pairs. 1742 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1743 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1744 } else { 1745 for (unsigned i = 0; i != NumMove; ++i) { 1746 MachineInstr *Op = Ops.back(); 1747 Ops.pop_back(); 1748 MBB->splice(InsertPos, MBB, Op); 1749 } 1750 } 1751 1752 NumLdStMoved += NumMove; 1753 RetVal = true; 1754 } 1755 } 1756 } 1757 1758 return RetVal; 1759} 1760 1761bool 1762ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1763 bool RetVal = false; 1764 1765 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1766 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1767 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1768 SmallVector<unsigned, 4> LdBases; 1769 SmallVector<unsigned, 4> StBases; 1770 1771 unsigned Loc = 0; 1772 MachineBasicBlock::iterator MBBI = MBB->begin(); 1773 MachineBasicBlock::iterator E = MBB->end(); 1774 while (MBBI != E) { 1775 for (; MBBI != E; ++MBBI) { 1776 MachineInstr *MI = MBBI; 1777 if (MI->isCall() || MI->isTerminator()) { 1778 // Stop at barriers. 1779 ++MBBI; 1780 break; 1781 } 1782 1783 if (!MI->isDebugValue()) 1784 MI2LocMap[MI] = ++Loc; 1785 1786 if (!isMemoryOp(MI)) 1787 continue; 1788 unsigned PredReg = 0; 1789 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1790 continue; 1791 1792 int Opc = MI->getOpcode(); 1793 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1794 unsigned Base = MI->getOperand(1).getReg(); 1795 int Offset = getMemoryOpOffset(MI); 1796 1797 bool StopHere = false; 1798 if (isLd) { 1799 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1800 Base2LdsMap.find(Base); 1801 if (BI != Base2LdsMap.end()) { 1802 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1803 if (Offset == getMemoryOpOffset(BI->second[i])) { 1804 StopHere = true; 1805 break; 1806 } 1807 } 1808 if (!StopHere) 1809 BI->second.push_back(MI); 1810 } else { 1811 SmallVector<MachineInstr*, 4> MIs; 1812 MIs.push_back(MI); 1813 Base2LdsMap[Base] = MIs; 1814 LdBases.push_back(Base); 1815 } 1816 } else { 1817 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1818 Base2StsMap.find(Base); 1819 if (BI != Base2StsMap.end()) { 1820 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1821 if (Offset == getMemoryOpOffset(BI->second[i])) { 1822 StopHere = true; 1823 break; 1824 } 1825 } 1826 if (!StopHere) 1827 BI->second.push_back(MI); 1828 } else { 1829 SmallVector<MachineInstr*, 4> MIs; 1830 MIs.push_back(MI); 1831 Base2StsMap[Base] = MIs; 1832 StBases.push_back(Base); 1833 } 1834 } 1835 1836 if (StopHere) { 1837 // Found a duplicate (a base+offset combination that's seen earlier). 1838 // Backtrack. 1839 --Loc; 1840 break; 1841 } 1842 } 1843 1844 // Re-schedule loads. 1845 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1846 unsigned Base = LdBases[i]; 1847 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1848 if (Lds.size() > 1) 1849 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1850 } 1851 1852 // Re-schedule stores. 1853 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1854 unsigned Base = StBases[i]; 1855 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1856 if (Sts.size() > 1) 1857 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1858 } 1859 1860 if (MBBI != E) { 1861 Base2LdsMap.clear(); 1862 Base2StsMap.clear(); 1863 LdBases.clear(); 1864 StBases.clear(); 1865 } 1866 } 1867 1868 return RetVal; 1869} 1870 1871 1872/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1873/// optimization pass. 1874FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1875 if (PreAlloc) 1876 return new ARMPreAllocLoadStoreOpt(); 1877 return new ARMLoadStoreOpt(); 1878} 1879