ARMLoadStoreOptimizer.cpp revision 805543068eb7407d718b0359f54b342d7094d0ea
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMBaseRegisterInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "MCTargetDesc/ARMAddressingModes.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/CodeGen/SelectionDAGNodes.h" 30#include "llvm/Target/TargetData.h" 31#include "llvm/Target/TargetInstrInfo.h" 32#include "llvm/Target/TargetMachine.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/ADT/DenseMap.h" 38#include "llvm/ADT/STLExtras.h" 39#include "llvm/ADT/SmallPtrSet.h" 40#include "llvm/ADT/SmallSet.h" 41#include "llvm/ADT/SmallVector.h" 42#include "llvm/ADT/Statistic.h" 43using namespace llvm; 44 45STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58/// load / store instructions to form ldm / stm instructions. 59 60namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 94 int Offset, unsigned Base, bool BaseKill, int Opcode, 95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 97 void MergeOpsUpdate(MachineBasicBlock &MBB, 98 MemOpQueue &MemOps, 99 unsigned memOpsBegin, 100 unsigned memOpsEnd, 101 unsigned insertAfter, 102 int Offset, 103 unsigned Base, 104 bool BaseKill, 105 int Opcode, 106 ARMCC::CondCodes Pred, 107 unsigned PredReg, 108 unsigned Scratch, 109 DebugLoc dl, 110 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 112 int Opcode, unsigned Size, 113 ARMCC::CondCodes Pred, unsigned PredReg, 114 unsigned Scratch, MemOpQueue &MemOps, 115 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 116 117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator &MBBI); 120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator MBBI, 122 const TargetInstrInfo *TII, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 bool &Advance, 128 MachineBasicBlock::iterator &I); 129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 131 }; 132 char ARMLoadStoreOpt::ID = 0; 133} 134 135static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 136 switch (Opcode) { 137 default: llvm_unreachable("Unhandled opcode!"); 138 case ARM::LDRi12: 139 ++NumLDMGened; 140 switch (Mode) { 141 default: llvm_unreachable("Unhandled submode!"); 142 case ARM_AM::ia: return ARM::LDMIA; 143 case ARM_AM::da: return ARM::LDMDA; 144 case ARM_AM::db: return ARM::LDMDB; 145 case ARM_AM::ib: return ARM::LDMIB; 146 } 147 case ARM::STRi12: 148 ++NumSTMGened; 149 switch (Mode) { 150 default: llvm_unreachable("Unhandled submode!"); 151 case ARM_AM::ia: return ARM::STMIA; 152 case ARM_AM::da: return ARM::STMDA; 153 case ARM_AM::db: return ARM::STMDB; 154 case ARM_AM::ib: return ARM::STMIB; 155 } 156 case ARM::t2LDRi8: 157 case ARM::t2LDRi12: 158 ++NumLDMGened; 159 switch (Mode) { 160 default: llvm_unreachable("Unhandled submode!"); 161 case ARM_AM::ia: return ARM::t2LDMIA; 162 case ARM_AM::db: return ARM::t2LDMDB; 163 } 164 case ARM::t2STRi8: 165 case ARM::t2STRi12: 166 ++NumSTMGened; 167 switch (Mode) { 168 default: llvm_unreachable("Unhandled submode!"); 169 case ARM_AM::ia: return ARM::t2STMIA; 170 case ARM_AM::db: return ARM::t2STMDB; 171 } 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 178 } 179 case ARM::VSTRS: 180 ++NumVSTMGened; 181 switch (Mode) { 182 default: llvm_unreachable("Unhandled submode!"); 183 case ARM_AM::ia: return ARM::VSTMSIA; 184 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 185 } 186 case ARM::VLDRD: 187 ++NumVLDMGened; 188 switch (Mode) { 189 default: llvm_unreachable("Unhandled submode!"); 190 case ARM_AM::ia: return ARM::VLDMDIA; 191 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 192 } 193 case ARM::VSTRD: 194 ++NumVSTMGened; 195 switch (Mode) { 196 default: llvm_unreachable("Unhandled submode!"); 197 case ARM_AM::ia: return ARM::VSTMDIA; 198 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 199 } 200 } 201} 202 203namespace llvm { 204 namespace ARM_AM { 205 206AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 207 switch (Opcode) { 208 default: llvm_unreachable("Unhandled opcode!"); 209 case ARM::LDMIA_RET: 210 case ARM::LDMIA: 211 case ARM::LDMIA_UPD: 212 case ARM::STMIA: 213 case ARM::STMIA_UPD: 214 case ARM::t2LDMIA_RET: 215 case ARM::t2LDMIA: 216 case ARM::t2LDMIA_UPD: 217 case ARM::t2STMIA: 218 case ARM::t2STMIA_UPD: 219 case ARM::VLDMSIA: 220 case ARM::VLDMSIA_UPD: 221 case ARM::VSTMSIA: 222 case ARM::VSTMSIA_UPD: 223 case ARM::VLDMDIA: 224 case ARM::VLDMDIA_UPD: 225 case ARM::VSTMDIA: 226 case ARM::VSTMDIA_UPD: 227 return ARM_AM::ia; 228 229 case ARM::LDMDA: 230 case ARM::LDMDA_UPD: 231 case ARM::STMDA: 232 case ARM::STMDA_UPD: 233 return ARM_AM::da; 234 235 case ARM::LDMDB: 236 case ARM::LDMDB_UPD: 237 case ARM::STMDB: 238 case ARM::STMDB_UPD: 239 case ARM::t2LDMDB: 240 case ARM::t2LDMDB_UPD: 241 case ARM::t2STMDB: 242 case ARM::t2STMDB_UPD: 243 case ARM::VLDMSDB_UPD: 244 case ARM::VSTMSDB_UPD: 245 case ARM::VLDMDDB_UPD: 246 case ARM::VSTMDDB_UPD: 247 return ARM_AM::db; 248 249 case ARM::LDMIB: 250 case ARM::LDMIB_UPD: 251 case ARM::STMIB: 252 case ARM::STMIB_UPD: 253 return ARM_AM::ib; 254 } 255} 256 257 } // end namespace ARM_AM 258} // end namespace llvm 259 260static bool isT2i32Load(unsigned Opc) { 261 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 262} 263 264static bool isi32Load(unsigned Opc) { 265 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 266} 267 268static bool isT2i32Store(unsigned Opc) { 269 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 270} 271 272static bool isi32Store(unsigned Opc) { 273 return Opc == ARM::STRi12 || isT2i32Store(Opc); 274} 275 276/// MergeOps - Create and insert a LDM or STM with Base as base register and 277/// registers in Regs as the register operands that would be loaded / stored. 278/// It returns true if the transformation is done. 279bool 280ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 281 MachineBasicBlock::iterator MBBI, 282 int Offset, unsigned Base, bool BaseKill, 283 int Opcode, ARMCC::CondCodes Pred, 284 unsigned PredReg, unsigned Scratch, DebugLoc dl, 285 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 286 // Only a single register to load / store. Don't bother. 287 unsigned NumRegs = Regs.size(); 288 if (NumRegs <= 1) 289 return false; 290 291 ARM_AM::AMSubMode Mode = ARM_AM::ia; 292 // VFP and Thumb2 do not support IB or DA modes. 293 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 294 bool haveIBAndDA = isNotVFP && !isThumb2; 295 if (Offset == 4 && haveIBAndDA) 296 Mode = ARM_AM::ib; 297 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 298 Mode = ARM_AM::da; 299 else if (Offset == -4 * (int)NumRegs && isNotVFP) 300 // VLDM/VSTM do not support DB mode without also updating the base reg. 301 Mode = ARM_AM::db; 302 else if (Offset != 0) { 303 // Check if this is a supported opcode before we insert instructions to 304 // calculate a new base register. 305 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 306 307 // If starting offset isn't zero, insert a MI to materialize a new base. 308 // But only do so if it is cost effective, i.e. merging more than two 309 // loads / stores. 310 if (NumRegs <= 2) 311 return false; 312 313 unsigned NewBase; 314 if (isi32Load(Opcode)) 315 // If it is a load, then just use one of the destination register to 316 // use as the new base. 317 NewBase = Regs[NumRegs-1].first; 318 else { 319 // Use the scratch register to use as a new base. 320 NewBase = Scratch; 321 if (NewBase == 0) 322 return false; 323 } 324 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 325 if (Offset < 0) { 326 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 327 Offset = - Offset; 328 } 329 int ImmedOffset = isThumb2 330 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 331 if (ImmedOffset == -1) 332 // FIXME: Try t2ADDri12 or t2SUBri12? 333 return false; // Probably not worth it then. 334 335 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 336 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 337 .addImm(Pred).addReg(PredReg).addReg(0); 338 Base = NewBase; 339 BaseKill = true; // New base is always killed right its use. 340 } 341 342 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 343 Opcode == ARM::VLDRD); 344 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 345 if (!Opcode) return false; 346 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 347 .addReg(Base, getKillRegState(BaseKill)) 348 .addImm(Pred).addReg(PredReg); 349 for (unsigned i = 0; i != NumRegs; ++i) 350 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 351 | getKillRegState(Regs[i].second)); 352 353 return true; 354} 355 356// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 357// success. 358void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 359 MemOpQueue &memOps, 360 unsigned memOpsBegin, unsigned memOpsEnd, 361 unsigned insertAfter, int Offset, 362 unsigned Base, bool BaseKill, 363 int Opcode, 364 ARMCC::CondCodes Pred, unsigned PredReg, 365 unsigned Scratch, 366 DebugLoc dl, 367 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 368 // First calculate which of the registers should be killed by the merged 369 // instruction. 370 const unsigned insertPos = memOps[insertAfter].Position; 371 SmallSet<unsigned, 4> KilledRegs; 372 DenseMap<unsigned, unsigned> Killer; 373 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 374 if (i == memOpsBegin) { 375 i = memOpsEnd; 376 if (i == e) 377 break; 378 } 379 if (memOps[i].Position < insertPos && memOps[i].isKill) { 380 unsigned Reg = memOps[i].Reg; 381 KilledRegs.insert(Reg); 382 Killer[Reg] = i; 383 } 384 } 385 386 SmallVector<std::pair<unsigned, bool>, 8> Regs; 387 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 388 unsigned Reg = memOps[i].Reg; 389 // If we are inserting the merged operation after an operation that 390 // uses the same register, make sure to transfer any kill flag. 391 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 392 Regs.push_back(std::make_pair(Reg, isKill)); 393 } 394 395 // Try to do the merge. 396 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 397 ++Loc; 398 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 399 Pred, PredReg, Scratch, dl, Regs)) 400 return; 401 402 // Merge succeeded, update records. 403 Merges.push_back(prior(Loc)); 404 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 405 // Remove kill flags from any memops that come before insertPos. 406 if (Regs[i-memOpsBegin].second) { 407 unsigned Reg = Regs[i-memOpsBegin].first; 408 if (KilledRegs.count(Reg)) { 409 unsigned j = Killer[Reg]; 410 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 411 assert(Idx >= 0 && "Cannot find killing operand"); 412 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 413 memOps[j].isKill = false; 414 } 415 memOps[i].isKill = true; 416 } 417 MBB.erase(memOps[i].MBBI); 418 // Update this memop to refer to the merged instruction. 419 // We may need to move kill flags again. 420 memOps[i].Merged = true; 421 memOps[i].MBBI = Merges.back(); 422 memOps[i].Position = insertPos; 423 } 424} 425 426/// MergeLDR_STR - Merge a number of load / store instructions into one or more 427/// load / store multiple instructions. 428void 429ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 430 unsigned Base, int Opcode, unsigned Size, 431 ARMCC::CondCodes Pred, unsigned PredReg, 432 unsigned Scratch, MemOpQueue &MemOps, 433 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 434 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 435 int Offset = MemOps[SIndex].Offset; 436 int SOffset = Offset; 437 unsigned insertAfter = SIndex; 438 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 439 DebugLoc dl = Loc->getDebugLoc(); 440 const MachineOperand &PMO = Loc->getOperand(0); 441 unsigned PReg = PMO.getReg(); 442 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 443 : getARMRegisterNumbering(PReg); 444 unsigned Count = 1; 445 unsigned Limit = ~0U; 446 447 // vldm / vstm limit are 32 for S variants, 16 for D variants. 448 449 switch (Opcode) { 450 default: break; 451 case ARM::VSTRS: 452 Limit = 32; 453 break; 454 case ARM::VSTRD: 455 Limit = 16; 456 break; 457 case ARM::VLDRD: 458 Limit = 16; 459 break; 460 case ARM::VLDRS: 461 Limit = 32; 462 break; 463 } 464 465 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 466 int NewOffset = MemOps[i].Offset; 467 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 468 unsigned Reg = MO.getReg(); 469 unsigned RegNum = MO.isUndef() ? UINT_MAX 470 : getARMRegisterNumbering(Reg); 471 // Register numbers must be in ascending order. For VFP / NEON load and 472 // store multiples, the registers must also be consecutive and within the 473 // limit on the number of registers per instruction. 474 if (Reg != ARM::SP && 475 NewOffset == Offset + (int)Size && 476 ((isNotVFP && RegNum > PRegNum) || 477 ((Count < Limit) && RegNum == PRegNum+1))) { 478 Offset += Size; 479 PRegNum = RegNum; 480 ++Count; 481 } else { 482 // Can't merge this in. Try merge the earlier ones first. 483 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 484 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 485 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 486 MemOps, Merges); 487 return; 488 } 489 490 if (MemOps[i].Position > MemOps[insertAfter].Position) 491 insertAfter = i; 492 } 493 494 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 495 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 496 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 497 return; 498} 499 500static bool definesCPSR(MachineInstr *MI) { 501 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 502 const MachineOperand &MO = MI->getOperand(i); 503 if (!MO.isReg()) 504 continue; 505 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 506 // If the instruction has live CPSR def, then it's not safe to fold it 507 // into load / store. 508 return true; 509 } 510 511 return false; 512} 513 514static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 515 unsigned Bytes, unsigned Limit, 516 ARMCC::CondCodes Pred, unsigned PredReg) { 517 unsigned MyPredReg = 0; 518 if (!MI) 519 return false; 520 521 bool CheckCPSRDef = false; 522 switch (MI->getOpcode()) { 523 default: return false; 524 case ARM::t2SUBri: 525 case ARM::SUBri: 526 CheckCPSRDef = true; 527 // fallthrough 528 case ARM::tSUBspi: 529 break; 530 } 531 532 // Make sure the offset fits in 8 bits. 533 if (Bytes == 0 || (Limit && Bytes >= Limit)) 534 return false; 535 536 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 537 if (!(MI->getOperand(0).getReg() == Base && 538 MI->getOperand(1).getReg() == Base && 539 (MI->getOperand(2).getImm()*Scale) == Bytes && 540 getInstrPredicate(MI, MyPredReg) == Pred && 541 MyPredReg == PredReg)) 542 return false; 543 544 return CheckCPSRDef ? !definesCPSR(MI) : true; 545} 546 547static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 548 unsigned Bytes, unsigned Limit, 549 ARMCC::CondCodes Pred, unsigned PredReg) { 550 unsigned MyPredReg = 0; 551 if (!MI) 552 return false; 553 554 bool CheckCPSRDef = false; 555 switch (MI->getOpcode()) { 556 default: return false; 557 case ARM::t2ADDri: 558 case ARM::ADDri: 559 CheckCPSRDef = true; 560 // fallthrough 561 case ARM::tADDspi: 562 break; 563 } 564 565 if (Bytes == 0 || (Limit && Bytes >= Limit)) 566 // Make sure the offset fits in 8 bits. 567 return false; 568 569 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 570 if (!(MI->getOperand(0).getReg() == Base && 571 MI->getOperand(1).getReg() == Base && 572 (MI->getOperand(2).getImm()*Scale) == Bytes && 573 getInstrPredicate(MI, MyPredReg) == Pred && 574 MyPredReg == PredReg)) 575 return false; 576 577 return CheckCPSRDef ? !definesCPSR(MI) : true; 578} 579 580static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 581 switch (MI->getOpcode()) { 582 default: return 0; 583 case ARM::LDRi12: 584 case ARM::STRi12: 585 case ARM::t2LDRi8: 586 case ARM::t2LDRi12: 587 case ARM::t2STRi8: 588 case ARM::t2STRi12: 589 case ARM::VLDRS: 590 case ARM::VSTRS: 591 return 4; 592 case ARM::VLDRD: 593 case ARM::VSTRD: 594 return 8; 595 case ARM::LDMIA: 596 case ARM::LDMDA: 597 case ARM::LDMDB: 598 case ARM::LDMIB: 599 case ARM::STMIA: 600 case ARM::STMDA: 601 case ARM::STMDB: 602 case ARM::STMIB: 603 case ARM::t2LDMIA: 604 case ARM::t2LDMDB: 605 case ARM::t2STMIA: 606 case ARM::t2STMDB: 607 case ARM::VLDMSIA: 608 case ARM::VSTMSIA: 609 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 610 case ARM::VLDMDIA: 611 case ARM::VSTMDIA: 612 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 613 } 614} 615 616static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 617 ARM_AM::AMSubMode Mode) { 618 switch (Opc) { 619 default: llvm_unreachable("Unhandled opcode!"); 620 case ARM::LDMIA: 621 case ARM::LDMDA: 622 case ARM::LDMDB: 623 case ARM::LDMIB: 624 switch (Mode) { 625 default: llvm_unreachable("Unhandled submode!"); 626 case ARM_AM::ia: return ARM::LDMIA_UPD; 627 case ARM_AM::ib: return ARM::LDMIB_UPD; 628 case ARM_AM::da: return ARM::LDMDA_UPD; 629 case ARM_AM::db: return ARM::LDMDB_UPD; 630 } 631 case ARM::STMIA: 632 case ARM::STMDA: 633 case ARM::STMDB: 634 case ARM::STMIB: 635 switch (Mode) { 636 default: llvm_unreachable("Unhandled submode!"); 637 case ARM_AM::ia: return ARM::STMIA_UPD; 638 case ARM_AM::ib: return ARM::STMIB_UPD; 639 case ARM_AM::da: return ARM::STMDA_UPD; 640 case ARM_AM::db: return ARM::STMDB_UPD; 641 } 642 case ARM::t2LDMIA: 643 case ARM::t2LDMDB: 644 switch (Mode) { 645 default: llvm_unreachable("Unhandled submode!"); 646 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 647 case ARM_AM::db: return ARM::t2LDMDB_UPD; 648 } 649 case ARM::t2STMIA: 650 case ARM::t2STMDB: 651 switch (Mode) { 652 default: llvm_unreachable("Unhandled submode!"); 653 case ARM_AM::ia: return ARM::t2STMIA_UPD; 654 case ARM_AM::db: return ARM::t2STMDB_UPD; 655 } 656 case ARM::VLDMSIA: 657 switch (Mode) { 658 default: llvm_unreachable("Unhandled submode!"); 659 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 660 case ARM_AM::db: return ARM::VLDMSDB_UPD; 661 } 662 case ARM::VLDMDIA: 663 switch (Mode) { 664 default: llvm_unreachable("Unhandled submode!"); 665 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 666 case ARM_AM::db: return ARM::VLDMDDB_UPD; 667 } 668 case ARM::VSTMSIA: 669 switch (Mode) { 670 default: llvm_unreachable("Unhandled submode!"); 671 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 672 case ARM_AM::db: return ARM::VSTMSDB_UPD; 673 } 674 case ARM::VSTMDIA: 675 switch (Mode) { 676 default: llvm_unreachable("Unhandled submode!"); 677 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 678 case ARM_AM::db: return ARM::VSTMDDB_UPD; 679 } 680 } 681} 682 683/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 684/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 685/// 686/// stmia rn, <ra, rb, rc> 687/// rn := rn + 4 * 3; 688/// => 689/// stmia rn!, <ra, rb, rc> 690/// 691/// rn := rn - 4 * 3; 692/// ldmia rn, <ra, rb, rc> 693/// => 694/// ldmdb rn!, <ra, rb, rc> 695bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 696 MachineBasicBlock::iterator MBBI, 697 bool &Advance, 698 MachineBasicBlock::iterator &I) { 699 MachineInstr *MI = MBBI; 700 unsigned Base = MI->getOperand(0).getReg(); 701 bool BaseKill = MI->getOperand(0).isKill(); 702 unsigned Bytes = getLSMultipleTransferSize(MI); 703 unsigned PredReg = 0; 704 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 705 int Opcode = MI->getOpcode(); 706 DebugLoc dl = MI->getDebugLoc(); 707 708 // Can't use an updating ld/st if the base register is also a dest 709 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 710 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 711 if (MI->getOperand(i).getReg() == Base) 712 return false; 713 714 bool DoMerge = false; 715 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 716 717 // Try merging with the previous instruction. 718 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 719 if (MBBI != BeginMBBI) { 720 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 721 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 722 --PrevMBBI; 723 if (Mode == ARM_AM::ia && 724 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 725 Mode = ARM_AM::db; 726 DoMerge = true; 727 } else if (Mode == ARM_AM::ib && 728 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 729 Mode = ARM_AM::da; 730 DoMerge = true; 731 } 732 if (DoMerge) 733 MBB.erase(PrevMBBI); 734 } 735 736 // Try merging with the next instruction. 737 MachineBasicBlock::iterator EndMBBI = MBB.end(); 738 if (!DoMerge && MBBI != EndMBBI) { 739 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 740 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 741 ++NextMBBI; 742 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 743 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 744 DoMerge = true; 745 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 746 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 747 DoMerge = true; 748 } 749 if (DoMerge) { 750 if (NextMBBI == I) { 751 Advance = true; 752 ++I; 753 } 754 MBB.erase(NextMBBI); 755 } 756 } 757 758 if (!DoMerge) 759 return false; 760 761 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 762 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 763 .addReg(Base, getDefRegState(true)) // WB base register 764 .addReg(Base, getKillRegState(BaseKill)) 765 .addImm(Pred).addReg(PredReg); 766 767 // Transfer the rest of operands. 768 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 769 MIB.addOperand(MI->getOperand(OpNum)); 770 771 // Transfer memoperands. 772 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 773 774 MBB.erase(MBBI); 775 return true; 776} 777 778static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 779 ARM_AM::AddrOpc Mode) { 780 switch (Opc) { 781 case ARM::LDRi12: 782 return ARM::LDR_PRE_IMM; 783 case ARM::STRi12: 784 return ARM::STR_PRE_IMM; 785 case ARM::VLDRS: 786 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 787 case ARM::VLDRD: 788 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 789 case ARM::VSTRS: 790 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 791 case ARM::VSTRD: 792 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 793 case ARM::t2LDRi8: 794 case ARM::t2LDRi12: 795 return ARM::t2LDR_PRE; 796 case ARM::t2STRi8: 797 case ARM::t2STRi12: 798 return ARM::t2STR_PRE; 799 default: llvm_unreachable("Unhandled opcode!"); 800 } 801} 802 803static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 804 ARM_AM::AddrOpc Mode) { 805 switch (Opc) { 806 case ARM::LDRi12: 807 return ARM::LDR_POST_IMM; 808 case ARM::STRi12: 809 return ARM::STR_POST_IMM; 810 case ARM::VLDRS: 811 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 812 case ARM::VLDRD: 813 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 814 case ARM::VSTRS: 815 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 816 case ARM::VSTRD: 817 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 818 case ARM::t2LDRi8: 819 case ARM::t2LDRi12: 820 return ARM::t2LDR_POST; 821 case ARM::t2STRi8: 822 case ARM::t2STRi12: 823 return ARM::t2STR_POST; 824 default: llvm_unreachable("Unhandled opcode!"); 825 } 826} 827 828/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 829/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 830bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 831 MachineBasicBlock::iterator MBBI, 832 const TargetInstrInfo *TII, 833 bool &Advance, 834 MachineBasicBlock::iterator &I) { 835 MachineInstr *MI = MBBI; 836 unsigned Base = MI->getOperand(1).getReg(); 837 bool BaseKill = MI->getOperand(1).isKill(); 838 unsigned Bytes = getLSMultipleTransferSize(MI); 839 int Opcode = MI->getOpcode(); 840 DebugLoc dl = MI->getDebugLoc(); 841 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 842 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 843 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 844 if (isi32Load(Opcode) || isi32Store(Opcode)) 845 if (MI->getOperand(2).getImm() != 0) 846 return false; 847 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 848 return false; 849 850 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 851 // Can't do the merge if the destination register is the same as the would-be 852 // writeback register. 853 if (isLd && MI->getOperand(0).getReg() == Base) 854 return false; 855 856 unsigned PredReg = 0; 857 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 858 bool DoMerge = false; 859 ARM_AM::AddrOpc AddSub = ARM_AM::add; 860 unsigned NewOpc = 0; 861 // AM2 - 12 bits, thumb2 - 8 bits. 862 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 863 864 // Try merging with the previous instruction. 865 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 866 if (MBBI != BeginMBBI) { 867 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 868 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 869 --PrevMBBI; 870 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 871 DoMerge = true; 872 AddSub = ARM_AM::sub; 873 } else if (!isAM5 && 874 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 875 DoMerge = true; 876 } 877 if (DoMerge) { 878 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 879 MBB.erase(PrevMBBI); 880 } 881 } 882 883 // Try merging with the next instruction. 884 MachineBasicBlock::iterator EndMBBI = MBB.end(); 885 if (!DoMerge && MBBI != EndMBBI) { 886 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 887 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 888 ++NextMBBI; 889 if (!isAM5 && 890 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 891 DoMerge = true; 892 AddSub = ARM_AM::sub; 893 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 894 DoMerge = true; 895 } 896 if (DoMerge) { 897 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 898 if (NextMBBI == I) { 899 Advance = true; 900 ++I; 901 } 902 MBB.erase(NextMBBI); 903 } 904 } 905 906 if (!DoMerge) 907 return false; 908 909 if (isAM5) { 910 // VLDM[SD}_UPD, VSTM[SD]_UPD 911 // (There are no base-updating versions of VLDR/VSTR instructions, but the 912 // updating load/store-multiple instructions can be used with only one 913 // register.) 914 MachineOperand &MO = MI->getOperand(0); 915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 916 .addReg(Base, getDefRegState(true)) // WB base register 917 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 918 .addImm(Pred).addReg(PredReg) 919 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 920 getKillRegState(MO.isKill()))); 921 } else if (isLd) { 922 if (isAM2) { 923 // LDR_PRE, LDR_POST 924 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 925 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 926 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 927 .addReg(Base, RegState::Define) 928 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 929 } else { 930 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 931 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 932 .addReg(Base, RegState::Define) 933 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 934 } 935 } else { 936 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 937 // t2LDR_PRE, t2LDR_POST 938 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 939 .addReg(Base, RegState::Define) 940 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 941 } 942 } else { 943 MachineOperand &MO = MI->getOperand(0); 944 // FIXME: post-indexed stores use am2offset_imm, which still encodes 945 // the vestigal zero-reg offset register. When that's fixed, this clause 946 // can be removed entirely. 947 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 948 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 949 // STR_PRE, STR_POST 950 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 951 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 952 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 953 } else { 954 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 955 // t2STR_PRE, t2STR_POST 956 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 957 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 958 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 959 } 960 } 961 MBB.erase(MBBI); 962 963 return true; 964} 965 966/// isMemoryOp - Returns true if instruction is a memory operation that this 967/// pass is capable of operating on. 968static bool isMemoryOp(const MachineInstr *MI) { 969 // When no memory operands are present, conservatively assume unaligned, 970 // volatile, unfoldable. 971 if (!MI->hasOneMemOperand()) 972 return false; 973 974 const MachineMemOperand *MMO = *MI->memoperands_begin(); 975 976 // Don't touch volatile memory accesses - we may be changing their order. 977 if (MMO->isVolatile()) 978 return false; 979 980 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 981 // not. 982 if (MMO->getAlignment() < 4) 983 return false; 984 985 // str <undef> could probably be eliminated entirely, but for now we just want 986 // to avoid making a mess of it. 987 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 988 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 989 MI->getOperand(0).isUndef()) 990 return false; 991 992 // Likewise don't mess with references to undefined addresses. 993 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 994 MI->getOperand(1).isUndef()) 995 return false; 996 997 int Opcode = MI->getOpcode(); 998 switch (Opcode) { 999 default: break; 1000 case ARM::VLDRS: 1001 case ARM::VSTRS: 1002 return MI->getOperand(1).isReg(); 1003 case ARM::VLDRD: 1004 case ARM::VSTRD: 1005 return MI->getOperand(1).isReg(); 1006 case ARM::LDRi12: 1007 case ARM::STRi12: 1008 case ARM::t2LDRi8: 1009 case ARM::t2LDRi12: 1010 case ARM::t2STRi8: 1011 case ARM::t2STRi12: 1012 return MI->getOperand(1).isReg(); 1013 } 1014 return false; 1015} 1016 1017/// AdvanceRS - Advance register scavenger to just before the earliest memory 1018/// op that is being merged. 1019void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1020 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1021 unsigned Position = MemOps[0].Position; 1022 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1023 if (MemOps[i].Position < Position) { 1024 Position = MemOps[i].Position; 1025 Loc = MemOps[i].MBBI; 1026 } 1027 } 1028 1029 if (Loc != MBB.begin()) 1030 RS->forward(prior(Loc)); 1031} 1032 1033static int getMemoryOpOffset(const MachineInstr *MI) { 1034 int Opcode = MI->getOpcode(); 1035 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1036 unsigned NumOperands = MI->getDesc().getNumOperands(); 1037 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1038 1039 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1040 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1041 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1042 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1043 return OffField; 1044 1045 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1046 : ARM_AM::getAM5Offset(OffField) * 4; 1047 if (isAM3) { 1048 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1049 Offset = -Offset; 1050 } else { 1051 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1052 Offset = -Offset; 1053 } 1054 return Offset; 1055} 1056 1057static void InsertLDR_STR(MachineBasicBlock &MBB, 1058 MachineBasicBlock::iterator &MBBI, 1059 int Offset, bool isDef, 1060 DebugLoc dl, unsigned NewOpc, 1061 unsigned Reg, bool RegDeadKill, bool RegUndef, 1062 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1063 bool OffKill, bool OffUndef, 1064 ARMCC::CondCodes Pred, unsigned PredReg, 1065 const TargetInstrInfo *TII, bool isT2) { 1066 if (isDef) { 1067 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1068 TII->get(NewOpc)) 1069 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1070 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1071 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1072 } else { 1073 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1074 TII->get(NewOpc)) 1075 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1076 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1077 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1078 } 1079} 1080 1081bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1082 MachineBasicBlock::iterator &MBBI) { 1083 MachineInstr *MI = &*MBBI; 1084 unsigned Opcode = MI->getOpcode(); 1085 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1086 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1087 const MachineOperand &BaseOp = MI->getOperand(2); 1088 unsigned BaseReg = BaseOp.getReg(); 1089 unsigned EvenReg = MI->getOperand(0).getReg(); 1090 unsigned OddReg = MI->getOperand(1).getReg(); 1091 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1092 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1093 // ARM errata 602117: LDRD with base in list may result in incorrect base 1094 // register when interrupted or faulted. 1095 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1096 if (!Errata602117 && 1097 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1098 return false; 1099 1100 MachineBasicBlock::iterator NewBBI = MBBI; 1101 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1102 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1103 bool EvenDeadKill = isLd ? 1104 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1105 bool EvenUndef = MI->getOperand(0).isUndef(); 1106 bool OddDeadKill = isLd ? 1107 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1108 bool OddUndef = MI->getOperand(1).isUndef(); 1109 bool BaseKill = BaseOp.isKill(); 1110 bool BaseUndef = BaseOp.isUndef(); 1111 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1112 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1113 int OffImm = getMemoryOpOffset(MI); 1114 unsigned PredReg = 0; 1115 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1116 1117 if (OddRegNum > EvenRegNum && OffImm == 0) { 1118 // Ascending register numbers and no offset. It's safe to change it to a 1119 // ldm or stm. 1120 unsigned NewOpc = (isLd) 1121 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1122 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1123 if (isLd) { 1124 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1125 .addReg(BaseReg, getKillRegState(BaseKill)) 1126 .addImm(Pred).addReg(PredReg) 1127 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1128 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1129 ++NumLDRD2LDM; 1130 } else { 1131 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1132 .addReg(BaseReg, getKillRegState(BaseKill)) 1133 .addImm(Pred).addReg(PredReg) 1134 .addReg(EvenReg, 1135 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1136 .addReg(OddReg, 1137 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1138 ++NumSTRD2STM; 1139 } 1140 NewBBI = llvm::prior(MBBI); 1141 } else { 1142 // Split into two instructions. 1143 unsigned NewOpc = (isLd) 1144 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1145 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1146 DebugLoc dl = MBBI->getDebugLoc(); 1147 // If this is a load and base register is killed, it may have been 1148 // re-defed by the load, make sure the first load does not clobber it. 1149 if (isLd && 1150 (BaseKill || OffKill) && 1151 (TRI->regsOverlap(EvenReg, BaseReg))) { 1152 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1153 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1154 OddReg, OddDeadKill, false, 1155 BaseReg, false, BaseUndef, false, OffUndef, 1156 Pred, PredReg, TII, isT2); 1157 NewBBI = llvm::prior(MBBI); 1158 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1159 EvenReg, EvenDeadKill, false, 1160 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1161 Pred, PredReg, TII, isT2); 1162 } else { 1163 if (OddReg == EvenReg && EvenDeadKill) { 1164 // If the two source operands are the same, the kill marker is 1165 // probably on the first one. e.g. 1166 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1167 EvenDeadKill = false; 1168 OddDeadKill = true; 1169 } 1170 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1171 EvenReg, EvenDeadKill, EvenUndef, 1172 BaseReg, false, BaseUndef, false, OffUndef, 1173 Pred, PredReg, TII, isT2); 1174 NewBBI = llvm::prior(MBBI); 1175 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1176 OddReg, OddDeadKill, OddUndef, 1177 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1178 Pred, PredReg, TII, isT2); 1179 } 1180 if (isLd) 1181 ++NumLDRD2LDR; 1182 else 1183 ++NumSTRD2STR; 1184 } 1185 1186 MBB.erase(MI); 1187 MBBI = NewBBI; 1188 return true; 1189 } 1190 return false; 1191} 1192 1193/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1194/// ops of the same base and incrementing offset into LDM / STM ops. 1195bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1196 unsigned NumMerges = 0; 1197 unsigned NumMemOps = 0; 1198 MemOpQueue MemOps; 1199 unsigned CurrBase = 0; 1200 int CurrOpc = -1; 1201 unsigned CurrSize = 0; 1202 ARMCC::CondCodes CurrPred = ARMCC::AL; 1203 unsigned CurrPredReg = 0; 1204 unsigned Position = 0; 1205 SmallVector<MachineBasicBlock::iterator,4> Merges; 1206 1207 RS->enterBasicBlock(&MBB); 1208 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1209 while (MBBI != E) { 1210 if (FixInvalidRegPairOp(MBB, MBBI)) 1211 continue; 1212 1213 bool Advance = false; 1214 bool TryMerge = false; 1215 bool Clobber = false; 1216 1217 bool isMemOp = isMemoryOp(MBBI); 1218 if (isMemOp) { 1219 int Opcode = MBBI->getOpcode(); 1220 unsigned Size = getLSMultipleTransferSize(MBBI); 1221 const MachineOperand &MO = MBBI->getOperand(0); 1222 unsigned Reg = MO.getReg(); 1223 bool isKill = MO.isDef() ? false : MO.isKill(); 1224 unsigned Base = MBBI->getOperand(1).getReg(); 1225 unsigned PredReg = 0; 1226 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 1227 int Offset = getMemoryOpOffset(MBBI); 1228 // Watch out for: 1229 // r4 := ldr [r5] 1230 // r5 := ldr [r5, #4] 1231 // r6 := ldr [r5, #8] 1232 // 1233 // The second ldr has effectively broken the chain even though it 1234 // looks like the later ldr(s) use the same base register. Try to 1235 // merge the ldr's so far, including this one. But don't try to 1236 // combine the following ldr(s). 1237 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1238 if (CurrBase == 0 && !Clobber) { 1239 // Start of a new chain. 1240 CurrBase = Base; 1241 CurrOpc = Opcode; 1242 CurrSize = Size; 1243 CurrPred = Pred; 1244 CurrPredReg = PredReg; 1245 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1246 ++NumMemOps; 1247 Advance = true; 1248 } else { 1249 if (Clobber) { 1250 TryMerge = true; 1251 Advance = true; 1252 } 1253 1254 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1255 // No need to match PredReg. 1256 // Continue adding to the queue. 1257 if (Offset > MemOps.back().Offset) { 1258 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1259 Position, MBBI)); 1260 ++NumMemOps; 1261 Advance = true; 1262 } else { 1263 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1264 I != E; ++I) { 1265 if (Offset < I->Offset) { 1266 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1267 Position, MBBI)); 1268 ++NumMemOps; 1269 Advance = true; 1270 break; 1271 } else if (Offset == I->Offset) { 1272 // Collision! This can't be merged! 1273 break; 1274 } 1275 } 1276 } 1277 } 1278 } 1279 } 1280 1281 if (MBBI->isDebugValue()) { 1282 ++MBBI; 1283 if (MBBI == E) 1284 // Reach the end of the block, try merging the memory instructions. 1285 TryMerge = true; 1286 } else if (Advance) { 1287 ++Position; 1288 ++MBBI; 1289 if (MBBI == E) 1290 // Reach the end of the block, try merging the memory instructions. 1291 TryMerge = true; 1292 } else 1293 TryMerge = true; 1294 1295 if (TryMerge) { 1296 if (NumMemOps > 1) { 1297 // Try to find a free register to use as a new base in case it's needed. 1298 // First advance to the instruction just before the start of the chain. 1299 AdvanceRS(MBB, MemOps); 1300 // Find a scratch register. 1301 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1302 // Process the load / store instructions. 1303 RS->forward(prior(MBBI)); 1304 1305 // Merge ops. 1306 Merges.clear(); 1307 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1308 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1309 1310 // Try folding preceding/trailing base inc/dec into the generated 1311 // LDM/STM ops. 1312 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1313 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1314 ++NumMerges; 1315 NumMerges += Merges.size(); 1316 1317 // Try folding preceding/trailing base inc/dec into those load/store 1318 // that were not merged to form LDM/STM ops. 1319 for (unsigned i = 0; i != NumMemOps; ++i) 1320 if (!MemOps[i].Merged) 1321 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1322 ++NumMerges; 1323 1324 // RS may be pointing to an instruction that's deleted. 1325 RS->skipTo(prior(MBBI)); 1326 } else if (NumMemOps == 1) { 1327 // Try folding preceding/trailing base inc/dec into the single 1328 // load/store. 1329 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1330 ++NumMerges; 1331 RS->forward(prior(MBBI)); 1332 } 1333 } 1334 1335 CurrBase = 0; 1336 CurrOpc = -1; 1337 CurrSize = 0; 1338 CurrPred = ARMCC::AL; 1339 CurrPredReg = 0; 1340 if (NumMemOps) { 1341 MemOps.clear(); 1342 NumMemOps = 0; 1343 } 1344 1345 // If iterator hasn't been advanced and this is not a memory op, skip it. 1346 // It can't start a new chain anyway. 1347 if (!Advance && !isMemOp && MBBI != E) { 1348 ++Position; 1349 ++MBBI; 1350 } 1351 } 1352 } 1353 return NumMerges > 0; 1354} 1355 1356/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1357/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1358/// directly restore the value of LR into pc. 1359/// ldmfd sp!, {..., lr} 1360/// bx lr 1361/// or 1362/// ldmfd sp!, {..., lr} 1363/// mov pc, lr 1364/// => 1365/// ldmfd sp!, {..., pc} 1366bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1367 if (MBB.empty()) return false; 1368 1369 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1370 if (MBBI != MBB.begin() && 1371 (MBBI->getOpcode() == ARM::BX_RET || 1372 MBBI->getOpcode() == ARM::tBX_RET || 1373 MBBI->getOpcode() == ARM::MOVPCLR)) { 1374 MachineInstr *PrevMI = prior(MBBI); 1375 unsigned Opcode = PrevMI->getOpcode(); 1376 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1377 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1378 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1379 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1380 if (MO.getReg() != ARM::LR) 1381 return false; 1382 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1383 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1384 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1385 PrevMI->setDesc(TII->get(NewOpc)); 1386 MO.setReg(ARM::PC); 1387 PrevMI->copyImplicitOps(&*MBBI); 1388 MBB.erase(MBBI); 1389 return true; 1390 } 1391 } 1392 return false; 1393} 1394 1395bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1396 const TargetMachine &TM = Fn.getTarget(); 1397 AFI = Fn.getInfo<ARMFunctionInfo>(); 1398 TII = TM.getInstrInfo(); 1399 TRI = TM.getRegisterInfo(); 1400 STI = &TM.getSubtarget<ARMSubtarget>(); 1401 RS = new RegScavenger(); 1402 isThumb2 = AFI->isThumb2Function(); 1403 1404 bool Modified = false; 1405 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1406 ++MFI) { 1407 MachineBasicBlock &MBB = *MFI; 1408 Modified |= LoadStoreMultipleOpti(MBB); 1409 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1410 Modified |= MergeReturnIntoLDM(MBB); 1411 } 1412 1413 delete RS; 1414 1415 // Kill flags aren't updated accurately by this pass. 1416 Fn.getRegInfo().invalidateLiveness(); 1417 1418 return Modified; 1419} 1420 1421 1422/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1423/// load / stores from consecutive locations close to make it more 1424/// likely they will be combined later. 1425 1426namespace { 1427 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1428 static char ID; 1429 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1430 1431 const TargetData *TD; 1432 const TargetInstrInfo *TII; 1433 const TargetRegisterInfo *TRI; 1434 const ARMSubtarget *STI; 1435 MachineRegisterInfo *MRI; 1436 MachineFunction *MF; 1437 1438 virtual bool runOnMachineFunction(MachineFunction &Fn); 1439 1440 virtual const char *getPassName() const { 1441 return "ARM pre- register allocation load / store optimization pass"; 1442 } 1443 1444 private: 1445 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1446 unsigned &NewOpc, unsigned &EvenReg, 1447 unsigned &OddReg, unsigned &BaseReg, 1448 int &Offset, 1449 unsigned &PredReg, ARMCC::CondCodes &Pred, 1450 bool &isT2); 1451 bool RescheduleOps(MachineBasicBlock *MBB, 1452 SmallVector<MachineInstr*, 4> &Ops, 1453 unsigned Base, bool isLd, 1454 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1455 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1456 }; 1457 char ARMPreAllocLoadStoreOpt::ID = 0; 1458} 1459 1460bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1461 TD = Fn.getTarget().getTargetData(); 1462 TII = Fn.getTarget().getInstrInfo(); 1463 TRI = Fn.getTarget().getRegisterInfo(); 1464 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1465 MRI = &Fn.getRegInfo(); 1466 MF = &Fn; 1467 1468 bool Modified = false; 1469 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1470 ++MFI) 1471 Modified |= RescheduleLoadStoreInstrs(MFI); 1472 1473 return Modified; 1474} 1475 1476static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1477 MachineBasicBlock::iterator I, 1478 MachineBasicBlock::iterator E, 1479 SmallPtrSet<MachineInstr*, 4> &MemOps, 1480 SmallSet<unsigned, 4> &MemRegs, 1481 const TargetRegisterInfo *TRI) { 1482 // Are there stores / loads / calls between them? 1483 // FIXME: This is overly conservative. We should make use of alias information 1484 // some day. 1485 SmallSet<unsigned, 4> AddedRegPressure; 1486 while (++I != E) { 1487 if (I->isDebugValue() || MemOps.count(&*I)) 1488 continue; 1489 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1490 return false; 1491 if (isLd && I->mayStore()) 1492 return false; 1493 if (!isLd) { 1494 if (I->mayLoad()) 1495 return false; 1496 // It's not safe to move the first 'str' down. 1497 // str r1, [r0] 1498 // strh r5, [r0] 1499 // str r4, [r0, #+4] 1500 if (I->mayStore()) 1501 return false; 1502 } 1503 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1504 MachineOperand &MO = I->getOperand(j); 1505 if (!MO.isReg()) 1506 continue; 1507 unsigned Reg = MO.getReg(); 1508 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1509 return false; 1510 if (Reg != Base && !MemRegs.count(Reg)) 1511 AddedRegPressure.insert(Reg); 1512 } 1513 } 1514 1515 // Estimate register pressure increase due to the transformation. 1516 if (MemRegs.size() <= 4) 1517 // Ok if we are moving small number of instructions. 1518 return true; 1519 return AddedRegPressure.size() <= MemRegs.size() * 2; 1520} 1521 1522 1523/// Copy Op0 and Op1 operands into a new array assigned to MI. 1524static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1525 MachineInstr *Op1) { 1526 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1527 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1528 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1529 1530 MachineFunction *MF = MI->getParent()->getParent(); 1531 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1532 MachineSDNode::mmo_iterator MemEnd = 1533 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1534 MemEnd = 1535 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1536 MI->setMemRefs(MemBegin, MemEnd); 1537} 1538 1539bool 1540ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1541 DebugLoc &dl, 1542 unsigned &NewOpc, unsigned &EvenReg, 1543 unsigned &OddReg, unsigned &BaseReg, 1544 int &Offset, unsigned &PredReg, 1545 ARMCC::CondCodes &Pred, 1546 bool &isT2) { 1547 // Make sure we're allowed to generate LDRD/STRD. 1548 if (!STI->hasV5TEOps()) 1549 return false; 1550 1551 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1552 unsigned Scale = 1; 1553 unsigned Opcode = Op0->getOpcode(); 1554 if (Opcode == ARM::LDRi12) 1555 NewOpc = ARM::LDRD; 1556 else if (Opcode == ARM::STRi12) 1557 NewOpc = ARM::STRD; 1558 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1559 NewOpc = ARM::t2LDRDi8; 1560 Scale = 4; 1561 isT2 = true; 1562 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1563 NewOpc = ARM::t2STRDi8; 1564 Scale = 4; 1565 isT2 = true; 1566 } else 1567 return false; 1568 1569 // Make sure the base address satisfies i64 ld / st alignment requirement. 1570 if (!Op0->hasOneMemOperand() || 1571 !(*Op0->memoperands_begin())->getValue() || 1572 (*Op0->memoperands_begin())->isVolatile()) 1573 return false; 1574 1575 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1576 const Function *Func = MF->getFunction(); 1577 unsigned ReqAlign = STI->hasV6Ops() 1578 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1579 : 8; // Pre-v6 need 8-byte align 1580 if (Align < ReqAlign) 1581 return false; 1582 1583 // Then make sure the immediate offset fits. 1584 int OffImm = getMemoryOpOffset(Op0); 1585 if (isT2) { 1586 int Limit = (1 << 8) * Scale; 1587 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1588 return false; 1589 Offset = OffImm; 1590 } else { 1591 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1592 if (OffImm < 0) { 1593 AddSub = ARM_AM::sub; 1594 OffImm = - OffImm; 1595 } 1596 int Limit = (1 << 8) * Scale; 1597 if (OffImm >= Limit || (OffImm & (Scale-1))) 1598 return false; 1599 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1600 } 1601 EvenReg = Op0->getOperand(0).getReg(); 1602 OddReg = Op1->getOperand(0).getReg(); 1603 if (EvenReg == OddReg) 1604 return false; 1605 BaseReg = Op0->getOperand(1).getReg(); 1606 Pred = getInstrPredicate(Op0, PredReg); 1607 dl = Op0->getDebugLoc(); 1608 return true; 1609} 1610 1611namespace { 1612 struct OffsetCompare { 1613 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1614 int LOffset = getMemoryOpOffset(LHS); 1615 int ROffset = getMemoryOpOffset(RHS); 1616 assert(LHS == RHS || LOffset != ROffset); 1617 return LOffset > ROffset; 1618 } 1619 }; 1620} 1621 1622bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1623 SmallVector<MachineInstr*, 4> &Ops, 1624 unsigned Base, bool isLd, 1625 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1626 bool RetVal = false; 1627 1628 // Sort by offset (in reverse order). 1629 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1630 1631 // The loads / stores of the same base are in order. Scan them from first to 1632 // last and check for the following: 1633 // 1. Any def of base. 1634 // 2. Any gaps. 1635 while (Ops.size() > 1) { 1636 unsigned FirstLoc = ~0U; 1637 unsigned LastLoc = 0; 1638 MachineInstr *FirstOp = 0; 1639 MachineInstr *LastOp = 0; 1640 int LastOffset = 0; 1641 unsigned LastOpcode = 0; 1642 unsigned LastBytes = 0; 1643 unsigned NumMove = 0; 1644 for (int i = Ops.size() - 1; i >= 0; --i) { 1645 MachineInstr *Op = Ops[i]; 1646 unsigned Loc = MI2LocMap[Op]; 1647 if (Loc <= FirstLoc) { 1648 FirstLoc = Loc; 1649 FirstOp = Op; 1650 } 1651 if (Loc >= LastLoc) { 1652 LastLoc = Loc; 1653 LastOp = Op; 1654 } 1655 1656 unsigned LSMOpcode 1657 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1658 if (LastOpcode && LSMOpcode != LastOpcode) 1659 break; 1660 1661 int Offset = getMemoryOpOffset(Op); 1662 unsigned Bytes = getLSMultipleTransferSize(Op); 1663 if (LastBytes) { 1664 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1665 break; 1666 } 1667 LastOffset = Offset; 1668 LastBytes = Bytes; 1669 LastOpcode = LSMOpcode; 1670 if (++NumMove == 8) // FIXME: Tune this limit. 1671 break; 1672 } 1673 1674 if (NumMove <= 1) 1675 Ops.pop_back(); 1676 else { 1677 SmallPtrSet<MachineInstr*, 4> MemOps; 1678 SmallSet<unsigned, 4> MemRegs; 1679 for (int i = NumMove-1; i >= 0; --i) { 1680 MemOps.insert(Ops[i]); 1681 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1682 } 1683 1684 // Be conservative, if the instructions are too far apart, don't 1685 // move them. We want to limit the increase of register pressure. 1686 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1687 if (DoMove) 1688 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1689 MemOps, MemRegs, TRI); 1690 if (!DoMove) { 1691 for (unsigned i = 0; i != NumMove; ++i) 1692 Ops.pop_back(); 1693 } else { 1694 // This is the new location for the loads / stores. 1695 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1696 while (InsertPos != MBB->end() 1697 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1698 ++InsertPos; 1699 1700 // If we are moving a pair of loads / stores, see if it makes sense 1701 // to try to allocate a pair of registers that can form register pairs. 1702 MachineInstr *Op0 = Ops.back(); 1703 MachineInstr *Op1 = Ops[Ops.size()-2]; 1704 unsigned EvenReg = 0, OddReg = 0; 1705 unsigned BaseReg = 0, PredReg = 0; 1706 ARMCC::CondCodes Pred = ARMCC::AL; 1707 bool isT2 = false; 1708 unsigned NewOpc = 0; 1709 int Offset = 0; 1710 DebugLoc dl; 1711 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1712 EvenReg, OddReg, BaseReg, 1713 Offset, PredReg, Pred, isT2)) { 1714 Ops.pop_back(); 1715 Ops.pop_back(); 1716 1717 const MCInstrDesc &MCID = TII->get(NewOpc); 1718 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1719 MRI->constrainRegClass(EvenReg, TRC); 1720 MRI->constrainRegClass(OddReg, TRC); 1721 1722 // Form the pair instruction. 1723 if (isLd) { 1724 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1725 .addReg(EvenReg, RegState::Define) 1726 .addReg(OddReg, RegState::Define) 1727 .addReg(BaseReg); 1728 // FIXME: We're converting from LDRi12 to an insn that still 1729 // uses addrmode2, so we need an explicit offset reg. It should 1730 // always by reg0 since we're transforming LDRi12s. 1731 if (!isT2) 1732 MIB.addReg(0); 1733 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1734 concatenateMemOperands(MIB, Op0, Op1); 1735 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1736 ++NumLDRDFormed; 1737 } else { 1738 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1739 .addReg(EvenReg) 1740 .addReg(OddReg) 1741 .addReg(BaseReg); 1742 // FIXME: We're converting from LDRi12 to an insn that still 1743 // uses addrmode2, so we need an explicit offset reg. It should 1744 // always by reg0 since we're transforming STRi12s. 1745 if (!isT2) 1746 MIB.addReg(0); 1747 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1748 concatenateMemOperands(MIB, Op0, Op1); 1749 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1750 ++NumSTRDFormed; 1751 } 1752 MBB->erase(Op0); 1753 MBB->erase(Op1); 1754 1755 // Add register allocation hints to form register pairs. 1756 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1757 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1758 } else { 1759 for (unsigned i = 0; i != NumMove; ++i) { 1760 MachineInstr *Op = Ops.back(); 1761 Ops.pop_back(); 1762 MBB->splice(InsertPos, MBB, Op); 1763 } 1764 } 1765 1766 NumLdStMoved += NumMove; 1767 RetVal = true; 1768 } 1769 } 1770 } 1771 1772 return RetVal; 1773} 1774 1775bool 1776ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1777 bool RetVal = false; 1778 1779 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1780 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1781 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1782 SmallVector<unsigned, 4> LdBases; 1783 SmallVector<unsigned, 4> StBases; 1784 1785 unsigned Loc = 0; 1786 MachineBasicBlock::iterator MBBI = MBB->begin(); 1787 MachineBasicBlock::iterator E = MBB->end(); 1788 while (MBBI != E) { 1789 for (; MBBI != E; ++MBBI) { 1790 MachineInstr *MI = MBBI; 1791 if (MI->isCall() || MI->isTerminator()) { 1792 // Stop at barriers. 1793 ++MBBI; 1794 break; 1795 } 1796 1797 if (!MI->isDebugValue()) 1798 MI2LocMap[MI] = ++Loc; 1799 1800 if (!isMemoryOp(MI)) 1801 continue; 1802 unsigned PredReg = 0; 1803 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1804 continue; 1805 1806 int Opc = MI->getOpcode(); 1807 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1808 unsigned Base = MI->getOperand(1).getReg(); 1809 int Offset = getMemoryOpOffset(MI); 1810 1811 bool StopHere = false; 1812 if (isLd) { 1813 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1814 Base2LdsMap.find(Base); 1815 if (BI != Base2LdsMap.end()) { 1816 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1817 if (Offset == getMemoryOpOffset(BI->second[i])) { 1818 StopHere = true; 1819 break; 1820 } 1821 } 1822 if (!StopHere) 1823 BI->second.push_back(MI); 1824 } else { 1825 SmallVector<MachineInstr*, 4> MIs; 1826 MIs.push_back(MI); 1827 Base2LdsMap[Base] = MIs; 1828 LdBases.push_back(Base); 1829 } 1830 } else { 1831 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1832 Base2StsMap.find(Base); 1833 if (BI != Base2StsMap.end()) { 1834 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1835 if (Offset == getMemoryOpOffset(BI->second[i])) { 1836 StopHere = true; 1837 break; 1838 } 1839 } 1840 if (!StopHere) 1841 BI->second.push_back(MI); 1842 } else { 1843 SmallVector<MachineInstr*, 4> MIs; 1844 MIs.push_back(MI); 1845 Base2StsMap[Base] = MIs; 1846 StBases.push_back(Base); 1847 } 1848 } 1849 1850 if (StopHere) { 1851 // Found a duplicate (a base+offset combination that's seen earlier). 1852 // Backtrack. 1853 --Loc; 1854 break; 1855 } 1856 } 1857 1858 // Re-schedule loads. 1859 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1860 unsigned Base = LdBases[i]; 1861 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1862 if (Lds.size() > 1) 1863 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1864 } 1865 1866 // Re-schedule stores. 1867 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1868 unsigned Base = StBases[i]; 1869 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1870 if (Sts.size() > 1) 1871 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1872 } 1873 1874 if (MBBI != E) { 1875 Base2LdsMap.clear(); 1876 Base2StsMap.clear(); 1877 LdBases.clear(); 1878 StBases.clear(); 1879 } 1880 } 1881 1882 return RetVal; 1883} 1884 1885 1886/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1887/// optimization pass. 1888FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1889 if (PreAlloc) 1890 return new ARMPreAllocLoadStoreOpt(); 1891 return new ARMLoadStoreOpt(); 1892} 1893