ARMLoadStoreOptimizer.cpp revision 0e5233a9e5ee9385c6a940e3985194d77bee0bbb
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMBaseRegisterInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "MCTargetDesc/ARMAddressingModes.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/CodeGen/SelectionDAGNodes.h" 30#include "llvm/Target/TargetData.h" 31#include "llvm/Target/TargetInstrInfo.h" 32#include "llvm/Target/TargetMachine.h" 33#include "llvm/Target/TargetRegisterInfo.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/ADT/DenseMap.h" 38#include "llvm/ADT/STLExtras.h" 39#include "llvm/ADT/SmallPtrSet.h" 40#include "llvm/ADT/SmallSet.h" 41#include "llvm/ADT/SmallVector.h" 42#include "llvm/ADT/Statistic.h" 43using namespace llvm; 44 45STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 46STATISTIC(NumSTMGened , "Number of stm instructions generated"); 47STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 48STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 49STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 51STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 52STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 53STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 54STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 55STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 56 57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 58/// load / store instructions to form ldm / stm instructions. 59 60namespace { 61 struct ARMLoadStoreOpt : public MachineFunctionPass { 62 static char ID; 63 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 64 65 const TargetInstrInfo *TII; 66 const TargetRegisterInfo *TRI; 67 const ARMSubtarget *STI; 68 ARMFunctionInfo *AFI; 69 RegScavenger *RS; 70 bool isThumb2; 71 72 virtual bool runOnMachineFunction(MachineFunction &Fn); 73 74 virtual const char *getPassName() const { 75 return "ARM load / store optimization pass"; 76 } 77 78 private: 79 struct MemOpQueueEntry { 80 int Offset; 81 unsigned Reg; 82 bool isKill; 83 unsigned Position; 84 MachineBasicBlock::iterator MBBI; 85 bool Merged; 86 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 87 MachineBasicBlock::iterator i) 88 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 89 }; 90 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 91 typedef MemOpQueue::iterator MemOpQueueIter; 92 93 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 94 int Offset, unsigned Base, bool BaseKill, int Opcode, 95 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 96 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 97 void MergeOpsUpdate(MachineBasicBlock &MBB, 98 MemOpQueue &MemOps, 99 unsigned memOpsBegin, 100 unsigned memOpsEnd, 101 unsigned insertAfter, 102 int Offset, 103 unsigned Base, 104 bool BaseKill, 105 int Opcode, 106 ARMCC::CondCodes Pred, 107 unsigned PredReg, 108 unsigned Scratch, 109 DebugLoc dl, 110 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 111 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 112 int Opcode, unsigned Size, 113 ARMCC::CondCodes Pred, unsigned PredReg, 114 unsigned Scratch, MemOpQueue &MemOps, 115 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 116 117 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 118 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator &MBBI); 120 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 121 MachineBasicBlock::iterator MBBI, 122 const TargetInstrInfo *TII, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 bool &Advance, 128 MachineBasicBlock::iterator &I); 129 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 130 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 131 }; 132 char ARMLoadStoreOpt::ID = 0; 133} 134 135static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 136 switch (Opcode) { 137 default: llvm_unreachable("Unhandled opcode!"); 138 case ARM::LDRi12: 139 ++NumLDMGened; 140 switch (Mode) { 141 default: llvm_unreachable("Unhandled submode!"); 142 case ARM_AM::ia: return ARM::LDMIA; 143 case ARM_AM::da: return ARM::LDMDA; 144 case ARM_AM::db: return ARM::LDMDB; 145 case ARM_AM::ib: return ARM::LDMIB; 146 } 147 case ARM::STRi12: 148 ++NumSTMGened; 149 switch (Mode) { 150 default: llvm_unreachable("Unhandled submode!"); 151 case ARM_AM::ia: return ARM::STMIA; 152 case ARM_AM::da: return ARM::STMDA; 153 case ARM_AM::db: return ARM::STMDB; 154 case ARM_AM::ib: return ARM::STMIB; 155 } 156 case ARM::t2LDRi8: 157 case ARM::t2LDRi12: 158 ++NumLDMGened; 159 switch (Mode) { 160 default: llvm_unreachable("Unhandled submode!"); 161 case ARM_AM::ia: return ARM::t2LDMIA; 162 case ARM_AM::db: return ARM::t2LDMDB; 163 } 164 case ARM::t2STRi8: 165 case ARM::t2STRi12: 166 ++NumSTMGened; 167 switch (Mode) { 168 default: llvm_unreachable("Unhandled submode!"); 169 case ARM_AM::ia: return ARM::t2STMIA; 170 case ARM_AM::db: return ARM::t2STMDB; 171 } 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. 178 } 179 case ARM::VSTRS: 180 ++NumVSTMGened; 181 switch (Mode) { 182 default: llvm_unreachable("Unhandled submode!"); 183 case ARM_AM::ia: return ARM::VSTMSIA; 184 case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. 185 } 186 case ARM::VLDRD: 187 ++NumVLDMGened; 188 switch (Mode) { 189 default: llvm_unreachable("Unhandled submode!"); 190 case ARM_AM::ia: return ARM::VLDMDIA; 191 case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. 192 } 193 case ARM::VSTRD: 194 ++NumVSTMGened; 195 switch (Mode) { 196 default: llvm_unreachable("Unhandled submode!"); 197 case ARM_AM::ia: return ARM::VSTMDIA; 198 case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. 199 } 200 } 201} 202 203namespace llvm { 204 namespace ARM_AM { 205 206AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 207 switch (Opcode) { 208 default: llvm_unreachable("Unhandled opcode!"); 209 case ARM::LDMIA_RET: 210 case ARM::LDMIA: 211 case ARM::LDMIA_UPD: 212 case ARM::STMIA: 213 case ARM::STMIA_UPD: 214 case ARM::t2LDMIA_RET: 215 case ARM::t2LDMIA: 216 case ARM::t2LDMIA_UPD: 217 case ARM::t2STMIA: 218 case ARM::t2STMIA_UPD: 219 case ARM::VLDMSIA: 220 case ARM::VLDMSIA_UPD: 221 case ARM::VSTMSIA: 222 case ARM::VSTMSIA_UPD: 223 case ARM::VLDMDIA: 224 case ARM::VLDMDIA_UPD: 225 case ARM::VSTMDIA: 226 case ARM::VSTMDIA_UPD: 227 return ARM_AM::ia; 228 229 case ARM::LDMDA: 230 case ARM::LDMDA_UPD: 231 case ARM::STMDA: 232 case ARM::STMDA_UPD: 233 return ARM_AM::da; 234 235 case ARM::LDMDB: 236 case ARM::LDMDB_UPD: 237 case ARM::STMDB: 238 case ARM::STMDB_UPD: 239 case ARM::t2LDMDB: 240 case ARM::t2LDMDB_UPD: 241 case ARM::t2STMDB: 242 case ARM::t2STMDB_UPD: 243 case ARM::VLDMSDB_UPD: 244 case ARM::VSTMSDB_UPD: 245 case ARM::VLDMDDB_UPD: 246 case ARM::VSTMDDB_UPD: 247 return ARM_AM::db; 248 249 case ARM::LDMIB: 250 case ARM::LDMIB_UPD: 251 case ARM::STMIB: 252 case ARM::STMIB_UPD: 253 return ARM_AM::ib; 254 } 255} 256 257 } // end namespace ARM_AM 258} // end namespace llvm 259 260static bool isT2i32Load(unsigned Opc) { 261 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 262} 263 264static bool isi32Load(unsigned Opc) { 265 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 266} 267 268static bool isT2i32Store(unsigned Opc) { 269 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 270} 271 272static bool isi32Store(unsigned Opc) { 273 return Opc == ARM::STRi12 || isT2i32Store(Opc); 274} 275 276/// MergeOps - Create and insert a LDM or STM with Base as base register and 277/// registers in Regs as the register operands that would be loaded / stored. 278/// It returns true if the transformation is done. 279bool 280ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 281 MachineBasicBlock::iterator MBBI, 282 int Offset, unsigned Base, bool BaseKill, 283 int Opcode, ARMCC::CondCodes Pred, 284 unsigned PredReg, unsigned Scratch, DebugLoc dl, 285 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 286 // Only a single register to load / store. Don't bother. 287 unsigned NumRegs = Regs.size(); 288 if (NumRegs <= 1) 289 return false; 290 291 ARM_AM::AMSubMode Mode = ARM_AM::ia; 292 // VFP and Thumb2 do not support IB or DA modes. 293 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 294 bool haveIBAndDA = isNotVFP && !isThumb2; 295 if (Offset == 4 && haveIBAndDA) 296 Mode = ARM_AM::ib; 297 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 298 Mode = ARM_AM::da; 299 else if (Offset == -4 * (int)NumRegs && isNotVFP) 300 // VLDM/VSTM do not support DB mode without also updating the base reg. 301 Mode = ARM_AM::db; 302 else if (Offset != 0) { 303 // Check if this is a supported opcode before we insert instructions to 304 // calculate a new base register. 305 if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; 306 307 // If starting offset isn't zero, insert a MI to materialize a new base. 308 // But only do so if it is cost effective, i.e. merging more than two 309 // loads / stores. 310 if (NumRegs <= 2) 311 return false; 312 313 unsigned NewBase; 314 if (isi32Load(Opcode)) 315 // If it is a load, then just use one of the destination register to 316 // use as the new base. 317 NewBase = Regs[NumRegs-1].first; 318 else { 319 // Use the scratch register to use as a new base. 320 NewBase = Scratch; 321 if (NewBase == 0) 322 return false; 323 } 324 int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; 325 if (Offset < 0) { 326 BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; 327 Offset = - Offset; 328 } 329 int ImmedOffset = isThumb2 330 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 331 if (ImmedOffset == -1) 332 // FIXME: Try t2ADDri12 or t2SUBri12? 333 return false; // Probably not worth it then. 334 335 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 336 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 337 .addImm(Pred).addReg(PredReg).addReg(0); 338 Base = NewBase; 339 BaseKill = true; // New base is always killed right its use. 340 } 341 342 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 343 Opcode == ARM::VLDRD); 344 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 345 if (!Opcode) return false; 346 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 347 .addReg(Base, getKillRegState(BaseKill)) 348 .addImm(Pred).addReg(PredReg); 349 for (unsigned i = 0; i != NumRegs; ++i) 350 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 351 | getKillRegState(Regs[i].second)); 352 353 return true; 354} 355 356// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 357// success. 358void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 359 MemOpQueue &memOps, 360 unsigned memOpsBegin, unsigned memOpsEnd, 361 unsigned insertAfter, int Offset, 362 unsigned Base, bool BaseKill, 363 int Opcode, 364 ARMCC::CondCodes Pred, unsigned PredReg, 365 unsigned Scratch, 366 DebugLoc dl, 367 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 368 // First calculate which of the registers should be killed by the merged 369 // instruction. 370 const unsigned insertPos = memOps[insertAfter].Position; 371 SmallSet<unsigned, 4> KilledRegs; 372 DenseMap<unsigned, unsigned> Killer; 373 for (unsigned i = 0, e = memOps.size(); i != e; ++i) { 374 if (i == memOpsBegin) { 375 i = memOpsEnd; 376 if (i == e) 377 break; 378 } 379 if (memOps[i].Position < insertPos && memOps[i].isKill) { 380 unsigned Reg = memOps[i].Reg; 381 KilledRegs.insert(Reg); 382 Killer[Reg] = i; 383 } 384 } 385 386 SmallVector<std::pair<unsigned, bool>, 8> Regs; 387 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 388 unsigned Reg = memOps[i].Reg; 389 // If we are inserting the merged operation after an operation that 390 // uses the same register, make sure to transfer any kill flag. 391 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 392 Regs.push_back(std::make_pair(Reg, isKill)); 393 } 394 395 // Try to do the merge. 396 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 397 ++Loc; 398 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 399 Pred, PredReg, Scratch, dl, Regs)) 400 return; 401 402 // Merge succeeded, update records. 403 Merges.push_back(prior(Loc)); 404 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 405 // Remove kill flags from any memops that come before insertPos. 406 if (Regs[i-memOpsBegin].second) { 407 unsigned Reg = Regs[i-memOpsBegin].first; 408 if (KilledRegs.count(Reg)) { 409 unsigned j = Killer[Reg]; 410 int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true); 411 assert(Idx >= 0 && "Cannot find killing operand"); 412 memOps[j].MBBI->getOperand(Idx).setIsKill(false); 413 memOps[j].isKill = false; 414 } 415 memOps[i].isKill = true; 416 } 417 MBB.erase(memOps[i].MBBI); 418 // Update this memop to refer to the merged instruction. 419 // We may need to move kill flags again. 420 memOps[i].Merged = true; 421 memOps[i].MBBI = Merges.back(); 422 memOps[i].Position = insertPos; 423 } 424} 425 426/// MergeLDR_STR - Merge a number of load / store instructions into one or more 427/// load / store multiple instructions. 428void 429ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 430 unsigned Base, int Opcode, unsigned Size, 431 ARMCC::CondCodes Pred, unsigned PredReg, 432 unsigned Scratch, MemOpQueue &MemOps, 433 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 434 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 435 int Offset = MemOps[SIndex].Offset; 436 int SOffset = Offset; 437 unsigned insertAfter = SIndex; 438 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 439 DebugLoc dl = Loc->getDebugLoc(); 440 const MachineOperand &PMO = Loc->getOperand(0); 441 unsigned PReg = PMO.getReg(); 442 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 443 : getARMRegisterNumbering(PReg); 444 unsigned Count = 1; 445 unsigned Limit = ~0U; 446 447 // vldm / vstm limit are 32 for S variants, 16 for D variants. 448 449 switch (Opcode) { 450 default: break; 451 case ARM::VSTRS: 452 Limit = 32; 453 break; 454 case ARM::VSTRD: 455 Limit = 16; 456 break; 457 case ARM::VLDRD: 458 Limit = 16; 459 break; 460 case ARM::VLDRS: 461 Limit = 32; 462 break; 463 } 464 465 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 466 int NewOffset = MemOps[i].Offset; 467 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 468 unsigned Reg = MO.getReg(); 469 unsigned RegNum = MO.isUndef() ? UINT_MAX 470 : getARMRegisterNumbering(Reg); 471 // Register numbers must be in ascending order. For VFP / NEON load and 472 // store multiples, the registers must also be consecutive and within the 473 // limit on the number of registers per instruction. 474 if (Reg != ARM::SP && 475 NewOffset == Offset + (int)Size && 476 ((isNotVFP && RegNum > PRegNum) || 477 ((Count < Limit) && RegNum == PRegNum+1))) { 478 Offset += Size; 479 PRegNum = RegNum; 480 ++Count; 481 } else { 482 // Can't merge this in. Try merge the earlier ones first. 483 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 484 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 485 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 486 MemOps, Merges); 487 return; 488 } 489 490 if (MemOps[i].Position > MemOps[insertAfter].Position) 491 insertAfter = i; 492 } 493 494 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 495 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 496 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 497 return; 498} 499 500static bool definesCPSR(MachineInstr *MI) { 501 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 502 const MachineOperand &MO = MI->getOperand(i); 503 if (!MO.isReg()) 504 continue; 505 if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) 506 // If the instruction has live CPSR def, then it's not safe to fold it 507 // into load / store. 508 return true; 509 } 510 511 return false; 512} 513 514static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 515 unsigned Bytes, unsigned Limit, 516 ARMCC::CondCodes Pred, unsigned PredReg) { 517 unsigned MyPredReg = 0; 518 if (!MI) 519 return false; 520 521 bool CheckCPSRDef = false; 522 switch (MI->getOpcode()) { 523 default: return false; 524 case ARM::t2SUBri: 525 case ARM::SUBri: 526 CheckCPSRDef = true; 527 // fallthrough 528 case ARM::tSUBspi: 529 break; 530 } 531 532 // Make sure the offset fits in 8 bits. 533 if (Bytes == 0 || (Limit && Bytes >= Limit)) 534 return false; 535 536 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 537 if (!(MI->getOperand(0).getReg() == Base && 538 MI->getOperand(1).getReg() == Base && 539 (MI->getOperand(2).getImm()*Scale) == Bytes && 540 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 541 MyPredReg == PredReg)) 542 return false; 543 544 return CheckCPSRDef ? !definesCPSR(MI) : true; 545} 546 547static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 548 unsigned Bytes, unsigned Limit, 549 ARMCC::CondCodes Pred, unsigned PredReg) { 550 unsigned MyPredReg = 0; 551 if (!MI) 552 return false; 553 554 bool CheckCPSRDef = false; 555 switch (MI->getOpcode()) { 556 default: return false; 557 case ARM::t2ADDri: 558 case ARM::ADDri: 559 CheckCPSRDef = true; 560 // fallthrough 561 case ARM::tADDspi: 562 break; 563 } 564 565 if (Bytes == 0 || (Limit && Bytes >= Limit)) 566 // Make sure the offset fits in 8 bits. 567 return false; 568 569 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 570 if (!(MI->getOperand(0).getReg() == Base && 571 MI->getOperand(1).getReg() == Base && 572 (MI->getOperand(2).getImm()*Scale) == Bytes && 573 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 574 MyPredReg == PredReg)) 575 return false; 576 577 return CheckCPSRDef ? !definesCPSR(MI) : true; 578} 579 580static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 581 switch (MI->getOpcode()) { 582 default: return 0; 583 case ARM::LDRi12: 584 case ARM::STRi12: 585 case ARM::t2LDRi8: 586 case ARM::t2LDRi12: 587 case ARM::t2STRi8: 588 case ARM::t2STRi12: 589 case ARM::VLDRS: 590 case ARM::VSTRS: 591 return 4; 592 case ARM::VLDRD: 593 case ARM::VSTRD: 594 return 8; 595 case ARM::LDMIA: 596 case ARM::LDMDA: 597 case ARM::LDMDB: 598 case ARM::LDMIB: 599 case ARM::STMIA: 600 case ARM::STMDA: 601 case ARM::STMDB: 602 case ARM::STMIB: 603 case ARM::t2LDMIA: 604 case ARM::t2LDMDB: 605 case ARM::t2STMIA: 606 case ARM::t2STMDB: 607 case ARM::VLDMSIA: 608 case ARM::VSTMSIA: 609 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 610 case ARM::VLDMDIA: 611 case ARM::VSTMDIA: 612 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 613 } 614} 615 616static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 617 ARM_AM::AMSubMode Mode) { 618 switch (Opc) { 619 default: llvm_unreachable("Unhandled opcode!"); 620 case ARM::LDMIA: 621 case ARM::LDMDA: 622 case ARM::LDMDB: 623 case ARM::LDMIB: 624 switch (Mode) { 625 default: llvm_unreachable("Unhandled submode!"); 626 case ARM_AM::ia: return ARM::LDMIA_UPD; 627 case ARM_AM::ib: return ARM::LDMIB_UPD; 628 case ARM_AM::da: return ARM::LDMDA_UPD; 629 case ARM_AM::db: return ARM::LDMDB_UPD; 630 } 631 case ARM::STMIA: 632 case ARM::STMDA: 633 case ARM::STMDB: 634 case ARM::STMIB: 635 switch (Mode) { 636 default: llvm_unreachable("Unhandled submode!"); 637 case ARM_AM::ia: return ARM::STMIA_UPD; 638 case ARM_AM::ib: return ARM::STMIB_UPD; 639 case ARM_AM::da: return ARM::STMDA_UPD; 640 case ARM_AM::db: return ARM::STMDB_UPD; 641 } 642 case ARM::t2LDMIA: 643 case ARM::t2LDMDB: 644 switch (Mode) { 645 default: llvm_unreachable("Unhandled submode!"); 646 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 647 case ARM_AM::db: return ARM::t2LDMDB_UPD; 648 } 649 case ARM::t2STMIA: 650 case ARM::t2STMDB: 651 switch (Mode) { 652 default: llvm_unreachable("Unhandled submode!"); 653 case ARM_AM::ia: return ARM::t2STMIA_UPD; 654 case ARM_AM::db: return ARM::t2STMDB_UPD; 655 } 656 case ARM::VLDMSIA: 657 switch (Mode) { 658 default: llvm_unreachable("Unhandled submode!"); 659 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 660 case ARM_AM::db: return ARM::VLDMSDB_UPD; 661 } 662 case ARM::VLDMDIA: 663 switch (Mode) { 664 default: llvm_unreachable("Unhandled submode!"); 665 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 666 case ARM_AM::db: return ARM::VLDMDDB_UPD; 667 } 668 case ARM::VSTMSIA: 669 switch (Mode) { 670 default: llvm_unreachable("Unhandled submode!"); 671 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 672 case ARM_AM::db: return ARM::VSTMSDB_UPD; 673 } 674 case ARM::VSTMDIA: 675 switch (Mode) { 676 default: llvm_unreachable("Unhandled submode!"); 677 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 678 case ARM_AM::db: return ARM::VSTMDDB_UPD; 679 } 680 } 681} 682 683/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 684/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 685/// 686/// stmia rn, <ra, rb, rc> 687/// rn := rn + 4 * 3; 688/// => 689/// stmia rn!, <ra, rb, rc> 690/// 691/// rn := rn - 4 * 3; 692/// ldmia rn, <ra, rb, rc> 693/// => 694/// ldmdb rn!, <ra, rb, rc> 695bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 696 MachineBasicBlock::iterator MBBI, 697 bool &Advance, 698 MachineBasicBlock::iterator &I) { 699 MachineInstr *MI = MBBI; 700 unsigned Base = MI->getOperand(0).getReg(); 701 bool BaseKill = MI->getOperand(0).isKill(); 702 unsigned Bytes = getLSMultipleTransferSize(MI); 703 unsigned PredReg = 0; 704 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 705 int Opcode = MI->getOpcode(); 706 DebugLoc dl = MI->getDebugLoc(); 707 708 // Can't use an updating ld/st if the base register is also a dest 709 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 710 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 711 if (MI->getOperand(i).getReg() == Base) 712 return false; 713 714 bool DoMerge = false; 715 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 716 717 // Try merging with the previous instruction. 718 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 719 if (MBBI != BeginMBBI) { 720 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 721 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 722 --PrevMBBI; 723 if (Mode == ARM_AM::ia && 724 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 725 Mode = ARM_AM::db; 726 DoMerge = true; 727 } else if (Mode == ARM_AM::ib && 728 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 729 Mode = ARM_AM::da; 730 DoMerge = true; 731 } 732 if (DoMerge) 733 MBB.erase(PrevMBBI); 734 } 735 736 // Try merging with the next instruction. 737 MachineBasicBlock::iterator EndMBBI = MBB.end(); 738 if (!DoMerge && MBBI != EndMBBI) { 739 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 740 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 741 ++NextMBBI; 742 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 743 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 744 DoMerge = true; 745 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 746 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 747 DoMerge = true; 748 } 749 if (DoMerge) { 750 if (NextMBBI == I) { 751 Advance = true; 752 ++I; 753 } 754 MBB.erase(NextMBBI); 755 } 756 } 757 758 if (!DoMerge) 759 return false; 760 761 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 762 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 763 .addReg(Base, getDefRegState(true)) // WB base register 764 .addReg(Base, getKillRegState(BaseKill)) 765 .addImm(Pred).addReg(PredReg); 766 767 // Transfer the rest of operands. 768 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 769 MIB.addOperand(MI->getOperand(OpNum)); 770 771 // Transfer memoperands. 772 MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 773 774 MBB.erase(MBBI); 775 return true; 776} 777 778static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 779 ARM_AM::AddrOpc Mode) { 780 switch (Opc) { 781 case ARM::LDRi12: 782 return ARM::LDR_PRE_IMM; 783 case ARM::STRi12: 784 return ARM::STR_PRE_IMM; 785 case ARM::VLDRS: 786 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 787 case ARM::VLDRD: 788 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 789 case ARM::VSTRS: 790 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 791 case ARM::VSTRD: 792 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 793 case ARM::t2LDRi8: 794 case ARM::t2LDRi12: 795 return ARM::t2LDR_PRE; 796 case ARM::t2STRi8: 797 case ARM::t2STRi12: 798 return ARM::t2STR_PRE; 799 default: llvm_unreachable("Unhandled opcode!"); 800 } 801} 802 803static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 804 ARM_AM::AddrOpc Mode) { 805 switch (Opc) { 806 case ARM::LDRi12: 807 return ARM::LDR_POST_IMM; 808 case ARM::STRi12: 809 return ARM::STR_POST_IMM; 810 case ARM::VLDRS: 811 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 812 case ARM::VLDRD: 813 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 814 case ARM::VSTRS: 815 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 816 case ARM::VSTRD: 817 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 818 case ARM::t2LDRi8: 819 case ARM::t2LDRi12: 820 return ARM::t2LDR_POST; 821 case ARM::t2STRi8: 822 case ARM::t2STRi12: 823 return ARM::t2STR_POST; 824 default: llvm_unreachable("Unhandled opcode!"); 825 } 826} 827 828/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 829/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 830bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 831 MachineBasicBlock::iterator MBBI, 832 const TargetInstrInfo *TII, 833 bool &Advance, 834 MachineBasicBlock::iterator &I) { 835 MachineInstr *MI = MBBI; 836 unsigned Base = MI->getOperand(1).getReg(); 837 bool BaseKill = MI->getOperand(1).isKill(); 838 unsigned Bytes = getLSMultipleTransferSize(MI); 839 int Opcode = MI->getOpcode(); 840 DebugLoc dl = MI->getDebugLoc(); 841 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 842 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 843 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 844 if (isi32Load(Opcode) || isi32Store(Opcode)) 845 if (MI->getOperand(2).getImm() != 0) 846 return false; 847 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 848 return false; 849 850 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 851 // Can't do the merge if the destination register is the same as the would-be 852 // writeback register. 853 if (isLd && MI->getOperand(0).getReg() == Base) 854 return false; 855 856 unsigned PredReg = 0; 857 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 858 bool DoMerge = false; 859 ARM_AM::AddrOpc AddSub = ARM_AM::add; 860 unsigned NewOpc = 0; 861 // AM2 - 12 bits, thumb2 - 8 bits. 862 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 863 864 // Try merging with the previous instruction. 865 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 866 if (MBBI != BeginMBBI) { 867 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 868 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 869 --PrevMBBI; 870 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 871 DoMerge = true; 872 AddSub = ARM_AM::sub; 873 } else if (!isAM5 && 874 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 875 DoMerge = true; 876 } 877 if (DoMerge) { 878 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 879 MBB.erase(PrevMBBI); 880 } 881 } 882 883 // Try merging with the next instruction. 884 MachineBasicBlock::iterator EndMBBI = MBB.end(); 885 if (!DoMerge && MBBI != EndMBBI) { 886 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 887 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 888 ++NextMBBI; 889 if (!isAM5 && 890 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 891 DoMerge = true; 892 AddSub = ARM_AM::sub; 893 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 894 DoMerge = true; 895 } 896 if (DoMerge) { 897 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 898 if (NextMBBI == I) { 899 Advance = true; 900 ++I; 901 } 902 MBB.erase(NextMBBI); 903 } 904 } 905 906 if (!DoMerge) 907 return false; 908 909 if (isAM5) { 910 // VLDM[SD}_UPD, VSTM[SD]_UPD 911 // (There are no base-updating versions of VLDR/VSTR instructions, but the 912 // updating load/store-multiple instructions can be used with only one 913 // register.) 914 MachineOperand &MO = MI->getOperand(0); 915 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 916 .addReg(Base, getDefRegState(true)) // WB base register 917 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 918 .addImm(Pred).addReg(PredReg) 919 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 920 getKillRegState(MO.isKill()))); 921 } else if (isLd) { 922 if (isAM2) { 923 // LDR_PRE, LDR_POST 924 if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { 925 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 926 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 927 .addReg(Base, RegState::Define) 928 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 929 } else { 930 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 931 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 932 .addReg(Base, RegState::Define) 933 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 934 } 935 } else { 936 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 937 // t2LDR_PRE, t2LDR_POST 938 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 939 .addReg(Base, RegState::Define) 940 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 941 } 942 } else { 943 MachineOperand &MO = MI->getOperand(0); 944 // FIXME: post-indexed stores use am2offset_imm, which still encodes 945 // the vestigal zero-reg offset register. When that's fixed, this clause 946 // can be removed entirely. 947 if (isAM2 && NewOpc == ARM::STR_POST_IMM) { 948 int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 949 // STR_PRE, STR_POST 950 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 951 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 952 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 953 } else { 954 int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 955 // t2STR_PRE, t2STR_POST 956 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 957 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 958 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 959 } 960 } 961 MBB.erase(MBBI); 962 963 return true; 964} 965 966/// isMemoryOp - Returns true if instruction is a memory operation that this 967/// pass is capable of operating on. 968static bool isMemoryOp(const MachineInstr *MI) { 969 // When no memory operands are present, conservatively assume unaligned, 970 // volatile, unfoldable. 971 if (!MI->hasOneMemOperand()) 972 return false; 973 974 const MachineMemOperand *MMO = *MI->memoperands_begin(); 975 976 // Don't touch volatile memory accesses - we may be changing their order. 977 if (MMO->isVolatile()) 978 return false; 979 980 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 981 // not. 982 if (MMO->getAlignment() < 4) 983 return false; 984 985 // str <undef> could probably be eliminated entirely, but for now we just want 986 // to avoid making a mess of it. 987 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 988 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 989 MI->getOperand(0).isUndef()) 990 return false; 991 992 // Likewise don't mess with references to undefined addresses. 993 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 994 MI->getOperand(1).isUndef()) 995 return false; 996 997 int Opcode = MI->getOpcode(); 998 switch (Opcode) { 999 default: break; 1000 case ARM::VLDRS: 1001 case ARM::VSTRS: 1002 return MI->getOperand(1).isReg(); 1003 case ARM::VLDRD: 1004 case ARM::VSTRD: 1005 return MI->getOperand(1).isReg(); 1006 case ARM::LDRi12: 1007 case ARM::STRi12: 1008 case ARM::t2LDRi8: 1009 case ARM::t2LDRi12: 1010 case ARM::t2STRi8: 1011 case ARM::t2STRi12: 1012 return MI->getOperand(1).isReg(); 1013 } 1014 return false; 1015} 1016 1017/// AdvanceRS - Advance register scavenger to just before the earliest memory 1018/// op that is being merged. 1019void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 1020 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 1021 unsigned Position = MemOps[0].Position; 1022 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 1023 if (MemOps[i].Position < Position) { 1024 Position = MemOps[i].Position; 1025 Loc = MemOps[i].MBBI; 1026 } 1027 } 1028 1029 if (Loc != MBB.begin()) 1030 RS->forward(prior(Loc)); 1031} 1032 1033static int getMemoryOpOffset(const MachineInstr *MI) { 1034 int Opcode = MI->getOpcode(); 1035 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 1036 unsigned NumOperands = MI->getDesc().getNumOperands(); 1037 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 1038 1039 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 1040 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 1041 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 1042 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 1043 return OffField; 1044 1045 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1046 : ARM_AM::getAM5Offset(OffField) * 4; 1047 if (isAM3) { 1048 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1049 Offset = -Offset; 1050 } else { 1051 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1052 Offset = -Offset; 1053 } 1054 return Offset; 1055} 1056 1057static void InsertLDR_STR(MachineBasicBlock &MBB, 1058 MachineBasicBlock::iterator &MBBI, 1059 int Offset, bool isDef, 1060 DebugLoc dl, unsigned NewOpc, 1061 unsigned Reg, bool RegDeadKill, bool RegUndef, 1062 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1063 bool OffKill, bool OffUndef, 1064 ARMCC::CondCodes Pred, unsigned PredReg, 1065 const TargetInstrInfo *TII, bool isT2) { 1066 if (isDef) { 1067 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1068 TII->get(NewOpc)) 1069 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1070 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1071 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1072 } else { 1073 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1074 TII->get(NewOpc)) 1075 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1076 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1077 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1078 } 1079} 1080 1081bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1082 MachineBasicBlock::iterator &MBBI) { 1083 MachineInstr *MI = &*MBBI; 1084 unsigned Opcode = MI->getOpcode(); 1085 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1086 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1087 const MachineOperand &BaseOp = MI->getOperand(2); 1088 unsigned BaseReg = BaseOp.getReg(); 1089 unsigned EvenReg = MI->getOperand(0).getReg(); 1090 unsigned OddReg = MI->getOperand(1).getReg(); 1091 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1092 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1093 // ARM errata 602117: LDRD with base in list may result in incorrect base 1094 // register when interrupted or faulted. 1095 bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); 1096 if (!Errata602117 && 1097 ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) 1098 return false; 1099 1100 MachineBasicBlock::iterator NewBBI = MBBI; 1101 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1102 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1103 bool EvenDeadKill = isLd ? 1104 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1105 bool EvenUndef = MI->getOperand(0).isUndef(); 1106 bool OddDeadKill = isLd ? 1107 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1108 bool OddUndef = MI->getOperand(1).isUndef(); 1109 bool BaseKill = BaseOp.isKill(); 1110 bool BaseUndef = BaseOp.isUndef(); 1111 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1112 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1113 int OffImm = getMemoryOpOffset(MI); 1114 unsigned PredReg = 0; 1115 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1116 1117 if (OddRegNum > EvenRegNum && OffImm == 0) { 1118 // Ascending register numbers and no offset. It's safe to change it to a 1119 // ldm or stm. 1120 unsigned NewOpc = (isLd) 1121 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1122 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1123 if (isLd) { 1124 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1125 .addReg(BaseReg, getKillRegState(BaseKill)) 1126 .addImm(Pred).addReg(PredReg) 1127 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1128 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1129 ++NumLDRD2LDM; 1130 } else { 1131 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1132 .addReg(BaseReg, getKillRegState(BaseKill)) 1133 .addImm(Pred).addReg(PredReg) 1134 .addReg(EvenReg, 1135 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1136 .addReg(OddReg, 1137 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1138 ++NumSTRD2STM; 1139 } 1140 NewBBI = llvm::prior(MBBI); 1141 } else { 1142 // Split into two instructions. 1143 unsigned NewOpc = (isLd) 1144 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1145 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1146 DebugLoc dl = MBBI->getDebugLoc(); 1147 // If this is a load and base register is killed, it may have been 1148 // re-defed by the load, make sure the first load does not clobber it. 1149 if (isLd && 1150 (BaseKill || OffKill) && 1151 (TRI->regsOverlap(EvenReg, BaseReg))) { 1152 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1153 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1154 OddReg, OddDeadKill, false, 1155 BaseReg, false, BaseUndef, false, OffUndef, 1156 Pred, PredReg, TII, isT2); 1157 NewBBI = llvm::prior(MBBI); 1158 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1159 EvenReg, EvenDeadKill, false, 1160 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1161 Pred, PredReg, TII, isT2); 1162 } else { 1163 if (OddReg == EvenReg && EvenDeadKill) { 1164 // If the two source operands are the same, the kill marker is 1165 // probably on the first one. e.g. 1166 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1167 EvenDeadKill = false; 1168 OddDeadKill = true; 1169 } 1170 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1171 EvenReg, EvenDeadKill, EvenUndef, 1172 BaseReg, false, BaseUndef, false, OffUndef, 1173 Pred, PredReg, TII, isT2); 1174 NewBBI = llvm::prior(MBBI); 1175 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1176 OddReg, OddDeadKill, OddUndef, 1177 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1178 Pred, PredReg, TII, isT2); 1179 } 1180 if (isLd) 1181 ++NumLDRD2LDR; 1182 else 1183 ++NumSTRD2STR; 1184 } 1185 1186 MBB.erase(MI); 1187 MBBI = NewBBI; 1188 return true; 1189 } 1190 return false; 1191} 1192 1193/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1194/// ops of the same base and incrementing offset into LDM / STM ops. 1195bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1196 unsigned NumMerges = 0; 1197 unsigned NumMemOps = 0; 1198 MemOpQueue MemOps; 1199 unsigned CurrBase = 0; 1200 int CurrOpc = -1; 1201 unsigned CurrSize = 0; 1202 ARMCC::CondCodes CurrPred = ARMCC::AL; 1203 unsigned CurrPredReg = 0; 1204 unsigned Position = 0; 1205 SmallVector<MachineBasicBlock::iterator,4> Merges; 1206 1207 RS->enterBasicBlock(&MBB); 1208 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1209 while (MBBI != E) { 1210 if (FixInvalidRegPairOp(MBB, MBBI)) 1211 continue; 1212 1213 bool Advance = false; 1214 bool TryMerge = false; 1215 bool Clobber = false; 1216 1217 bool isMemOp = isMemoryOp(MBBI); 1218 if (isMemOp) { 1219 int Opcode = MBBI->getOpcode(); 1220 unsigned Size = getLSMultipleTransferSize(MBBI); 1221 const MachineOperand &MO = MBBI->getOperand(0); 1222 unsigned Reg = MO.getReg(); 1223 bool isKill = MO.isDef() ? false : MO.isKill(); 1224 unsigned Base = MBBI->getOperand(1).getReg(); 1225 unsigned PredReg = 0; 1226 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1227 int Offset = getMemoryOpOffset(MBBI); 1228 // Watch out for: 1229 // r4 := ldr [r5] 1230 // r5 := ldr [r5, #4] 1231 // r6 := ldr [r5, #8] 1232 // 1233 // The second ldr has effectively broken the chain even though it 1234 // looks like the later ldr(s) use the same base register. Try to 1235 // merge the ldr's so far, including this one. But don't try to 1236 // combine the following ldr(s). 1237 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1238 if (CurrBase == 0 && !Clobber) { 1239 // Start of a new chain. 1240 CurrBase = Base; 1241 CurrOpc = Opcode; 1242 CurrSize = Size; 1243 CurrPred = Pred; 1244 CurrPredReg = PredReg; 1245 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1246 ++NumMemOps; 1247 Advance = true; 1248 } else { 1249 if (Clobber) { 1250 TryMerge = true; 1251 Advance = true; 1252 } 1253 1254 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1255 // No need to match PredReg. 1256 // Continue adding to the queue. 1257 if (Offset > MemOps.back().Offset) { 1258 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1259 Position, MBBI)); 1260 ++NumMemOps; 1261 Advance = true; 1262 } else { 1263 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1264 I != E; ++I) { 1265 if (Offset < I->Offset) { 1266 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1267 Position, MBBI)); 1268 ++NumMemOps; 1269 Advance = true; 1270 break; 1271 } else if (Offset == I->Offset) { 1272 // Collision! This can't be merged! 1273 break; 1274 } 1275 } 1276 } 1277 } 1278 } 1279 } 1280 1281 if (MBBI->isDebugValue()) { 1282 ++MBBI; 1283 if (MBBI == E) 1284 // Reach the end of the block, try merging the memory instructions. 1285 TryMerge = true; 1286 } else if (Advance) { 1287 ++Position; 1288 ++MBBI; 1289 if (MBBI == E) 1290 // Reach the end of the block, try merging the memory instructions. 1291 TryMerge = true; 1292 } else 1293 TryMerge = true; 1294 1295 if (TryMerge) { 1296 if (NumMemOps > 1) { 1297 // Try to find a free register to use as a new base in case it's needed. 1298 // First advance to the instruction just before the start of the chain. 1299 AdvanceRS(MBB, MemOps); 1300 // Find a scratch register. 1301 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1302 // Process the load / store instructions. 1303 RS->forward(prior(MBBI)); 1304 1305 // Merge ops. 1306 Merges.clear(); 1307 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1308 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1309 1310 // Try folding preceding/trailing base inc/dec into the generated 1311 // LDM/STM ops. 1312 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1313 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1314 ++NumMerges; 1315 NumMerges += Merges.size(); 1316 1317 // Try folding preceding/trailing base inc/dec into those load/store 1318 // that were not merged to form LDM/STM ops. 1319 for (unsigned i = 0; i != NumMemOps; ++i) 1320 if (!MemOps[i].Merged) 1321 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1322 ++NumMerges; 1323 1324 // RS may be pointing to an instruction that's deleted. 1325 RS->skipTo(prior(MBBI)); 1326 } else if (NumMemOps == 1) { 1327 // Try folding preceding/trailing base inc/dec into the single 1328 // load/store. 1329 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1330 ++NumMerges; 1331 RS->forward(prior(MBBI)); 1332 } 1333 } 1334 1335 CurrBase = 0; 1336 CurrOpc = -1; 1337 CurrSize = 0; 1338 CurrPred = ARMCC::AL; 1339 CurrPredReg = 0; 1340 if (NumMemOps) { 1341 MemOps.clear(); 1342 NumMemOps = 0; 1343 } 1344 1345 // If iterator hasn't been advanced and this is not a memory op, skip it. 1346 // It can't start a new chain anyway. 1347 if (!Advance && !isMemOp && MBBI != E) { 1348 ++Position; 1349 ++MBBI; 1350 } 1351 } 1352 } 1353 return NumMerges > 0; 1354} 1355 1356/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1357/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it 1358/// directly restore the value of LR into pc. 1359/// ldmfd sp!, {..., lr} 1360/// bx lr 1361/// or 1362/// ldmfd sp!, {..., lr} 1363/// mov pc, lr 1364/// => 1365/// ldmfd sp!, {..., pc} 1366bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1367 if (MBB.empty()) return false; 1368 1369 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1370 if (MBBI != MBB.begin() && 1371 (MBBI->getOpcode() == ARM::BX_RET || 1372 MBBI->getOpcode() == ARM::tBX_RET || 1373 MBBI->getOpcode() == ARM::MOVPCLR)) { 1374 MachineInstr *PrevMI = prior(MBBI); 1375 unsigned Opcode = PrevMI->getOpcode(); 1376 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1377 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1378 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1379 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1380 if (MO.getReg() != ARM::LR) 1381 return false; 1382 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1383 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1384 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1385 PrevMI->setDesc(TII->get(NewOpc)); 1386 MO.setReg(ARM::PC); 1387 PrevMI->copyImplicitOps(&*MBBI); 1388 MBB.erase(MBBI); 1389 return true; 1390 } 1391 } 1392 return false; 1393} 1394 1395bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1396 const TargetMachine &TM = Fn.getTarget(); 1397 AFI = Fn.getInfo<ARMFunctionInfo>(); 1398 TII = TM.getInstrInfo(); 1399 TRI = TM.getRegisterInfo(); 1400 STI = &TM.getSubtarget<ARMSubtarget>(); 1401 RS = new RegScavenger(); 1402 isThumb2 = AFI->isThumb2Function(); 1403 1404 bool Modified = false; 1405 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1406 ++MFI) { 1407 MachineBasicBlock &MBB = *MFI; 1408 Modified |= LoadStoreMultipleOpti(MBB); 1409 if (TM.getSubtarget<ARMSubtarget>().hasV5TOps()) 1410 Modified |= MergeReturnIntoLDM(MBB); 1411 } 1412 1413 delete RS; 1414 return Modified; 1415} 1416 1417 1418/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1419/// load / stores from consecutive locations close to make it more 1420/// likely they will be combined later. 1421 1422namespace { 1423 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1424 static char ID; 1425 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1426 1427 const TargetData *TD; 1428 const TargetInstrInfo *TII; 1429 const TargetRegisterInfo *TRI; 1430 const ARMSubtarget *STI; 1431 MachineRegisterInfo *MRI; 1432 MachineFunction *MF; 1433 1434 virtual bool runOnMachineFunction(MachineFunction &Fn); 1435 1436 virtual const char *getPassName() const { 1437 return "ARM pre- register allocation load / store optimization pass"; 1438 } 1439 1440 private: 1441 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1442 unsigned &NewOpc, unsigned &EvenReg, 1443 unsigned &OddReg, unsigned &BaseReg, 1444 int &Offset, 1445 unsigned &PredReg, ARMCC::CondCodes &Pred, 1446 bool &isT2); 1447 bool RescheduleOps(MachineBasicBlock *MBB, 1448 SmallVector<MachineInstr*, 4> &Ops, 1449 unsigned Base, bool isLd, 1450 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1451 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1452 }; 1453 char ARMPreAllocLoadStoreOpt::ID = 0; 1454} 1455 1456bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1457 TD = Fn.getTarget().getTargetData(); 1458 TII = Fn.getTarget().getInstrInfo(); 1459 TRI = Fn.getTarget().getRegisterInfo(); 1460 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1461 MRI = &Fn.getRegInfo(); 1462 MF = &Fn; 1463 1464 bool Modified = false; 1465 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1466 ++MFI) 1467 Modified |= RescheduleLoadStoreInstrs(MFI); 1468 1469 return Modified; 1470} 1471 1472static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1473 MachineBasicBlock::iterator I, 1474 MachineBasicBlock::iterator E, 1475 SmallPtrSet<MachineInstr*, 4> &MemOps, 1476 SmallSet<unsigned, 4> &MemRegs, 1477 const TargetRegisterInfo *TRI) { 1478 // Are there stores / loads / calls between them? 1479 // FIXME: This is overly conservative. We should make use of alias information 1480 // some day. 1481 SmallSet<unsigned, 4> AddedRegPressure; 1482 while (++I != E) { 1483 if (I->isDebugValue() || MemOps.count(&*I)) 1484 continue; 1485 if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects()) 1486 return false; 1487 if (isLd && I->mayStore()) 1488 return false; 1489 if (!isLd) { 1490 if (I->mayLoad()) 1491 return false; 1492 // It's not safe to move the first 'str' down. 1493 // str r1, [r0] 1494 // strh r5, [r0] 1495 // str r4, [r0, #+4] 1496 if (I->mayStore()) 1497 return false; 1498 } 1499 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1500 MachineOperand &MO = I->getOperand(j); 1501 if (!MO.isReg()) 1502 continue; 1503 unsigned Reg = MO.getReg(); 1504 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1505 return false; 1506 if (Reg != Base && !MemRegs.count(Reg)) 1507 AddedRegPressure.insert(Reg); 1508 } 1509 } 1510 1511 // Estimate register pressure increase due to the transformation. 1512 if (MemRegs.size() <= 4) 1513 // Ok if we are moving small number of instructions. 1514 return true; 1515 return AddedRegPressure.size() <= MemRegs.size() * 2; 1516} 1517 1518 1519/// Copy Op0 and Op1 operands into a new array assigned to MI. 1520static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, 1521 MachineInstr *Op1) { 1522 assert(MI->memoperands_empty() && "expected a new machineinstr"); 1523 size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) 1524 + (Op1->memoperands_end() - Op1->memoperands_begin()); 1525 1526 MachineFunction *MF = MI->getParent()->getParent(); 1527 MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); 1528 MachineSDNode::mmo_iterator MemEnd = 1529 std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); 1530 MemEnd = 1531 std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); 1532 MI->setMemRefs(MemBegin, MemEnd); 1533} 1534 1535bool 1536ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1537 DebugLoc &dl, 1538 unsigned &NewOpc, unsigned &EvenReg, 1539 unsigned &OddReg, unsigned &BaseReg, 1540 int &Offset, unsigned &PredReg, 1541 ARMCC::CondCodes &Pred, 1542 bool &isT2) { 1543 // Make sure we're allowed to generate LDRD/STRD. 1544 if (!STI->hasV5TEOps()) 1545 return false; 1546 1547 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1548 unsigned Scale = 1; 1549 unsigned Opcode = Op0->getOpcode(); 1550 if (Opcode == ARM::LDRi12) 1551 NewOpc = ARM::LDRD; 1552 else if (Opcode == ARM::STRi12) 1553 NewOpc = ARM::STRD; 1554 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1555 NewOpc = ARM::t2LDRDi8; 1556 Scale = 4; 1557 isT2 = true; 1558 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1559 NewOpc = ARM::t2STRDi8; 1560 Scale = 4; 1561 isT2 = true; 1562 } else 1563 return false; 1564 1565 // Make sure the base address satisfies i64 ld / st alignment requirement. 1566 if (!Op0->hasOneMemOperand() || 1567 !(*Op0->memoperands_begin())->getValue() || 1568 (*Op0->memoperands_begin())->isVolatile()) 1569 return false; 1570 1571 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1572 const Function *Func = MF->getFunction(); 1573 unsigned ReqAlign = STI->hasV6Ops() 1574 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1575 : 8; // Pre-v6 need 8-byte align 1576 if (Align < ReqAlign) 1577 return false; 1578 1579 // Then make sure the immediate offset fits. 1580 int OffImm = getMemoryOpOffset(Op0); 1581 if (isT2) { 1582 int Limit = (1 << 8) * Scale; 1583 if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) 1584 return false; 1585 Offset = OffImm; 1586 } else { 1587 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1588 if (OffImm < 0) { 1589 AddSub = ARM_AM::sub; 1590 OffImm = - OffImm; 1591 } 1592 int Limit = (1 << 8) * Scale; 1593 if (OffImm >= Limit || (OffImm & (Scale-1))) 1594 return false; 1595 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1596 } 1597 EvenReg = Op0->getOperand(0).getReg(); 1598 OddReg = Op1->getOperand(0).getReg(); 1599 if (EvenReg == OddReg) 1600 return false; 1601 BaseReg = Op0->getOperand(1).getReg(); 1602 Pred = llvm::getInstrPredicate(Op0, PredReg); 1603 dl = Op0->getDebugLoc(); 1604 return true; 1605} 1606 1607namespace { 1608 struct OffsetCompare { 1609 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1610 int LOffset = getMemoryOpOffset(LHS); 1611 int ROffset = getMemoryOpOffset(RHS); 1612 assert(LHS == RHS || LOffset != ROffset); 1613 return LOffset > ROffset; 1614 } 1615 }; 1616} 1617 1618bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1619 SmallVector<MachineInstr*, 4> &Ops, 1620 unsigned Base, bool isLd, 1621 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1622 bool RetVal = false; 1623 1624 // Sort by offset (in reverse order). 1625 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1626 1627 // The loads / stores of the same base are in order. Scan them from first to 1628 // last and check for the following: 1629 // 1. Any def of base. 1630 // 2. Any gaps. 1631 while (Ops.size() > 1) { 1632 unsigned FirstLoc = ~0U; 1633 unsigned LastLoc = 0; 1634 MachineInstr *FirstOp = 0; 1635 MachineInstr *LastOp = 0; 1636 int LastOffset = 0; 1637 unsigned LastOpcode = 0; 1638 unsigned LastBytes = 0; 1639 unsigned NumMove = 0; 1640 for (int i = Ops.size() - 1; i >= 0; --i) { 1641 MachineInstr *Op = Ops[i]; 1642 unsigned Loc = MI2LocMap[Op]; 1643 if (Loc <= FirstLoc) { 1644 FirstLoc = Loc; 1645 FirstOp = Op; 1646 } 1647 if (Loc >= LastLoc) { 1648 LastLoc = Loc; 1649 LastOp = Op; 1650 } 1651 1652 unsigned LSMOpcode 1653 = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); 1654 if (LastOpcode && LSMOpcode != LastOpcode) 1655 break; 1656 1657 int Offset = getMemoryOpOffset(Op); 1658 unsigned Bytes = getLSMultipleTransferSize(Op); 1659 if (LastBytes) { 1660 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1661 break; 1662 } 1663 LastOffset = Offset; 1664 LastBytes = Bytes; 1665 LastOpcode = LSMOpcode; 1666 if (++NumMove == 8) // FIXME: Tune this limit. 1667 break; 1668 } 1669 1670 if (NumMove <= 1) 1671 Ops.pop_back(); 1672 else { 1673 SmallPtrSet<MachineInstr*, 4> MemOps; 1674 SmallSet<unsigned, 4> MemRegs; 1675 for (int i = NumMove-1; i >= 0; --i) { 1676 MemOps.insert(Ops[i]); 1677 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1678 } 1679 1680 // Be conservative, if the instructions are too far apart, don't 1681 // move them. We want to limit the increase of register pressure. 1682 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1683 if (DoMove) 1684 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1685 MemOps, MemRegs, TRI); 1686 if (!DoMove) { 1687 for (unsigned i = 0; i != NumMove; ++i) 1688 Ops.pop_back(); 1689 } else { 1690 // This is the new location for the loads / stores. 1691 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1692 while (InsertPos != MBB->end() 1693 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1694 ++InsertPos; 1695 1696 // If we are moving a pair of loads / stores, see if it makes sense 1697 // to try to allocate a pair of registers that can form register pairs. 1698 MachineInstr *Op0 = Ops.back(); 1699 MachineInstr *Op1 = Ops[Ops.size()-2]; 1700 unsigned EvenReg = 0, OddReg = 0; 1701 unsigned BaseReg = 0, PredReg = 0; 1702 ARMCC::CondCodes Pred = ARMCC::AL; 1703 bool isT2 = false; 1704 unsigned NewOpc = 0; 1705 int Offset = 0; 1706 DebugLoc dl; 1707 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1708 EvenReg, OddReg, BaseReg, 1709 Offset, PredReg, Pred, isT2)) { 1710 Ops.pop_back(); 1711 Ops.pop_back(); 1712 1713 const MCInstrDesc &MCID = TII->get(NewOpc); 1714 const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI); 1715 MRI->constrainRegClass(EvenReg, TRC); 1716 MRI->constrainRegClass(OddReg, TRC); 1717 1718 // Form the pair instruction. 1719 if (isLd) { 1720 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1721 .addReg(EvenReg, RegState::Define) 1722 .addReg(OddReg, RegState::Define) 1723 .addReg(BaseReg); 1724 // FIXME: We're converting from LDRi12 to an insn that still 1725 // uses addrmode2, so we need an explicit offset reg. It should 1726 // always by reg0 since we're transforming LDRi12s. 1727 if (!isT2) 1728 MIB.addReg(0); 1729 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1730 concatenateMemOperands(MIB, Op0, Op1); 1731 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1732 ++NumLDRDFormed; 1733 } else { 1734 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) 1735 .addReg(EvenReg) 1736 .addReg(OddReg) 1737 .addReg(BaseReg); 1738 // FIXME: We're converting from LDRi12 to an insn that still 1739 // uses addrmode2, so we need an explicit offset reg. It should 1740 // always by reg0 since we're transforming STRi12s. 1741 if (!isT2) 1742 MIB.addReg(0); 1743 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1744 concatenateMemOperands(MIB, Op0, Op1); 1745 DEBUG(dbgs() << "Formed " << *MIB << "\n"); 1746 ++NumSTRDFormed; 1747 } 1748 MBB->erase(Op0); 1749 MBB->erase(Op1); 1750 1751 // Add register allocation hints to form register pairs. 1752 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1753 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1754 } else { 1755 for (unsigned i = 0; i != NumMove; ++i) { 1756 MachineInstr *Op = Ops.back(); 1757 Ops.pop_back(); 1758 MBB->splice(InsertPos, MBB, Op); 1759 } 1760 } 1761 1762 NumLdStMoved += NumMove; 1763 RetVal = true; 1764 } 1765 } 1766 } 1767 1768 return RetVal; 1769} 1770 1771bool 1772ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1773 bool RetVal = false; 1774 1775 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1776 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1777 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1778 SmallVector<unsigned, 4> LdBases; 1779 SmallVector<unsigned, 4> StBases; 1780 1781 unsigned Loc = 0; 1782 MachineBasicBlock::iterator MBBI = MBB->begin(); 1783 MachineBasicBlock::iterator E = MBB->end(); 1784 while (MBBI != E) { 1785 for (; MBBI != E; ++MBBI) { 1786 MachineInstr *MI = MBBI; 1787 if (MI->isCall() || MI->isTerminator()) { 1788 // Stop at barriers. 1789 ++MBBI; 1790 break; 1791 } 1792 1793 if (!MI->isDebugValue()) 1794 MI2LocMap[MI] = ++Loc; 1795 1796 if (!isMemoryOp(MI)) 1797 continue; 1798 unsigned PredReg = 0; 1799 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1800 continue; 1801 1802 int Opc = MI->getOpcode(); 1803 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1804 unsigned Base = MI->getOperand(1).getReg(); 1805 int Offset = getMemoryOpOffset(MI); 1806 1807 bool StopHere = false; 1808 if (isLd) { 1809 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1810 Base2LdsMap.find(Base); 1811 if (BI != Base2LdsMap.end()) { 1812 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1813 if (Offset == getMemoryOpOffset(BI->second[i])) { 1814 StopHere = true; 1815 break; 1816 } 1817 } 1818 if (!StopHere) 1819 BI->second.push_back(MI); 1820 } else { 1821 SmallVector<MachineInstr*, 4> MIs; 1822 MIs.push_back(MI); 1823 Base2LdsMap[Base] = MIs; 1824 LdBases.push_back(Base); 1825 } 1826 } else { 1827 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1828 Base2StsMap.find(Base); 1829 if (BI != Base2StsMap.end()) { 1830 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1831 if (Offset == getMemoryOpOffset(BI->second[i])) { 1832 StopHere = true; 1833 break; 1834 } 1835 } 1836 if (!StopHere) 1837 BI->second.push_back(MI); 1838 } else { 1839 SmallVector<MachineInstr*, 4> MIs; 1840 MIs.push_back(MI); 1841 Base2StsMap[Base] = MIs; 1842 StBases.push_back(Base); 1843 } 1844 } 1845 1846 if (StopHere) { 1847 // Found a duplicate (a base+offset combination that's seen earlier). 1848 // Backtrack. 1849 --Loc; 1850 break; 1851 } 1852 } 1853 1854 // Re-schedule loads. 1855 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1856 unsigned Base = LdBases[i]; 1857 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1858 if (Lds.size() > 1) 1859 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1860 } 1861 1862 // Re-schedule stores. 1863 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1864 unsigned Base = StBases[i]; 1865 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1866 if (Sts.size() > 1) 1867 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1868 } 1869 1870 if (MBBI != E) { 1871 Base2LdsMap.clear(); 1872 Base2StsMap.clear(); 1873 LdBases.clear(); 1874 StBases.clear(); 1875 } 1876 } 1877 1878 return RetVal; 1879} 1880 1881 1882/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1883/// optimization pass. 1884FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1885 if (PreAlloc) 1886 return new ARMPreAllocLoadStoreOpt(); 1887 return new ARMLoadStoreOpt(); 1888} 1889