ARMLoadStoreOptimizer.cpp revision 2567eec4233d58a2a0cbdcafca9420452689b395
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMBaseInstrInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMRegisterInfo.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/Target/TargetData.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/ADT/DenseMap.h" 35#include "llvm/ADT/STLExtras.h" 36#include "llvm/ADT/SmallPtrSet.h" 37#include "llvm/ADT/SmallSet.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40using namespace llvm; 41 42STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55/// load / store instructions to form ldm / stm instructions. 56 57namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Reg; 78 bool isKill; 79 unsigned Position; 80 MachineBasicBlock::iterator MBBI; 81 bool Merged; 82 MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 83 MachineBasicBlock::iterator i) 84 : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} 85 }; 86 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 87 typedef MemOpQueue::iterator MemOpQueueIter; 88 89 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 90 int Offset, unsigned Base, bool BaseKill, int Opcode, 91 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 92 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 93 void MergeOpsUpdate(MachineBasicBlock &MBB, 94 MemOpQueue &MemOps, 95 unsigned memOpsBegin, 96 unsigned memOpsEnd, 97 unsigned insertAfter, 98 int Offset, 99 unsigned Base, 100 bool BaseKill, 101 int Opcode, 102 ARMCC::CondCodes Pred, 103 unsigned PredReg, 104 unsigned Scratch, 105 DebugLoc dl, 106 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 107 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 108 int Opcode, unsigned Size, 109 ARMCC::CondCodes Pred, unsigned PredReg, 110 unsigned Scratch, MemOpQueue &MemOps, 111 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 112 113 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 114 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator &MBBI); 116 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator MBBI, 118 const TargetInstrInfo *TII, 119 bool &Advance, 120 MachineBasicBlock::iterator &I); 121 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 122 MachineBasicBlock::iterator MBBI, 123 bool &Advance, 124 MachineBasicBlock::iterator &I); 125 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 126 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 127 }; 128 char ARMLoadStoreOpt::ID = 0; 129} 130 131static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { 132 switch (Opcode) { 133 default: llvm_unreachable("Unhandled opcode!"); 134 case ARM::LDRi12: 135 ++NumLDMGened; 136 switch (Mode) { 137 default: llvm_unreachable("Unhandled submode!"); 138 case ARM_AM::ia: return ARM::LDMIA; 139 case ARM_AM::da: return ARM::LDMDA; 140 case ARM_AM::db: return ARM::LDMDB; 141 case ARM_AM::ib: return ARM::LDMIB; 142 } 143 break; 144 case ARM::STRi12: 145 ++NumSTMGened; 146 switch (Mode) { 147 default: llvm_unreachable("Unhandled submode!"); 148 case ARM_AM::ia: return ARM::STMIA; 149 case ARM_AM::da: return ARM::STMDA; 150 case ARM_AM::db: return ARM::STMDB; 151 case ARM_AM::ib: return ARM::STMIB; 152 } 153 break; 154 case ARM::t2LDRi8: 155 case ARM::t2LDRi12: 156 ++NumLDMGened; 157 switch (Mode) { 158 default: llvm_unreachable("Unhandled submode!"); 159 case ARM_AM::ia: return ARM::t2LDMIA; 160 case ARM_AM::db: return ARM::t2LDMDB; 161 } 162 break; 163 case ARM::t2STRi8: 164 case ARM::t2STRi12: 165 ++NumSTMGened; 166 switch (Mode) { 167 default: llvm_unreachable("Unhandled submode!"); 168 case ARM_AM::ia: return ARM::t2STMIA; 169 case ARM_AM::db: return ARM::t2STMDB; 170 } 171 break; 172 case ARM::VLDRS: 173 ++NumVLDMGened; 174 switch (Mode) { 175 default: llvm_unreachable("Unhandled submode!"); 176 case ARM_AM::ia: return ARM::VLDMSIA; 177 case ARM_AM::db: return ARM::VLDMSDB; 178 } 179 break; 180 case ARM::VSTRS: 181 ++NumVSTMGened; 182 switch (Mode) { 183 default: llvm_unreachable("Unhandled submode!"); 184 case ARM_AM::ia: return ARM::VSTMSIA; 185 case ARM_AM::db: return ARM::VSTMSDB; 186 } 187 break; 188 case ARM::VLDRD: 189 ++NumVLDMGened; 190 switch (Mode) { 191 default: llvm_unreachable("Unhandled submode!"); 192 case ARM_AM::ia: return ARM::VLDMDIA; 193 case ARM_AM::db: return ARM::VLDMDDB; 194 } 195 break; 196 case ARM::VSTRD: 197 ++NumVSTMGened; 198 switch (Mode) { 199 default: llvm_unreachable("Unhandled submode!"); 200 case ARM_AM::ia: return ARM::VSTMDIA; 201 case ARM_AM::db: return ARM::VSTMDDB; 202 } 203 break; 204 } 205 206 return 0; 207} 208 209namespace llvm { 210 namespace ARM_AM { 211 212AMSubMode getLoadStoreMultipleSubMode(int Opcode) { 213 switch (Opcode) { 214 default: llvm_unreachable("Unhandled opcode!"); 215 case ARM::LDMIA: 216 case ARM::STMIA: 217 case ARM::t2LDMIA: 218 case ARM::t2STMIA: 219 case ARM::VLDMSIA: 220 case ARM::VSTMSIA: 221 case ARM::VLDMDIA: 222 case ARM::VSTMDIA: 223 return ARM_AM::ia; 224 225 case ARM::LDMDA: 226 case ARM::STMDA: 227 return ARM_AM::da; 228 229 case ARM::LDMDB: 230 case ARM::STMDB: 231 case ARM::t2LDMDB: 232 case ARM::t2STMDB: 233 case ARM::VLDMSDB: 234 case ARM::VSTMSDB: 235 case ARM::VLDMDDB: 236 case ARM::VSTMDDB: 237 return ARM_AM::db; 238 239 case ARM::LDMIB: 240 case ARM::STMIB: 241 return ARM_AM::ib; 242 } 243 244 return ARM_AM::bad_am_submode; 245} 246 247 } // end namespace ARM_AM 248} // end namespace llvm 249 250static bool isT2i32Load(unsigned Opc) { 251 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 252} 253 254static bool isi32Load(unsigned Opc) { 255 return Opc == ARM::LDRi12 || isT2i32Load(Opc); 256} 257 258static bool isT2i32Store(unsigned Opc) { 259 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 260} 261 262static bool isi32Store(unsigned Opc) { 263 return Opc == ARM::STRi12 || isT2i32Store(Opc); 264} 265 266/// MergeOps - Create and insert a LDM or STM with Base as base register and 267/// registers in Regs as the register operands that would be loaded / stored. 268/// It returns true if the transformation is done. 269bool 270ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 271 MachineBasicBlock::iterator MBBI, 272 int Offset, unsigned Base, bool BaseKill, 273 int Opcode, ARMCC::CondCodes Pred, 274 unsigned PredReg, unsigned Scratch, DebugLoc dl, 275 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 276 // Only a single register to load / store. Don't bother. 277 unsigned NumRegs = Regs.size(); 278 if (NumRegs <= 1) 279 return false; 280 281 ARM_AM::AMSubMode Mode = ARM_AM::ia; 282 // VFP and Thumb2 do not support IB or DA modes. 283 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 284 bool haveIBAndDA = isNotVFP && !isThumb2; 285 if (Offset == 4 && haveIBAndDA) 286 Mode = ARM_AM::ib; 287 else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) 288 Mode = ARM_AM::da; 289 else if (Offset == -4 * (int)NumRegs && isNotVFP) 290 // VLDM/VSTM do not support DB mode without also updating the base reg. 291 Mode = ARM_AM::db; 292 else if (Offset != 0) { 293 // If starting offset isn't zero, insert a MI to materialize a new base. 294 // But only do so if it is cost effective, i.e. merging more than two 295 // loads / stores. 296 if (NumRegs <= 2) 297 return false; 298 299 unsigned NewBase; 300 if (isi32Load(Opcode)) 301 // If it is a load, then just use one of the destination register to 302 // use as the new base. 303 NewBase = Regs[NumRegs-1].first; 304 else { 305 // Use the scratch register to use as a new base. 306 NewBase = Scratch; 307 if (NewBase == 0) 308 return false; 309 } 310 int BaseOpc = !isThumb2 311 ? ARM::ADDri 312 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 313 if (Offset < 0) { 314 BaseOpc = !isThumb2 315 ? ARM::SUBri 316 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 317 Offset = - Offset; 318 } 319 int ImmedOffset = isThumb2 320 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 321 if (ImmedOffset == -1) 322 // FIXME: Try t2ADDri12 or t2SUBri12? 323 return false; // Probably not worth it then. 324 325 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 326 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 327 .addImm(Pred).addReg(PredReg).addReg(0); 328 Base = NewBase; 329 BaseKill = true; // New base is always killed right its use. 330 } 331 332 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 333 Opcode == ARM::VLDRD); 334 Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); 335 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 336 .addReg(Base, getKillRegState(BaseKill)) 337 .addImm(Pred).addReg(PredReg); 338 for (unsigned i = 0; i != NumRegs; ++i) 339 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 340 | getKillRegState(Regs[i].second)); 341 342 return true; 343} 344 345// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 346// success. 347void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, 348 MemOpQueue &memOps, 349 unsigned memOpsBegin, unsigned memOpsEnd, 350 unsigned insertAfter, int Offset, 351 unsigned Base, bool BaseKill, 352 int Opcode, 353 ARMCC::CondCodes Pred, unsigned PredReg, 354 unsigned Scratch, 355 DebugLoc dl, 356 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 357 // First calculate which of the registers should be killed by the merged 358 // instruction. 359 const unsigned insertPos = memOps[insertAfter].Position; 360 361 SmallSet<unsigned, 4> UnavailRegs; 362 SmallSet<unsigned, 4> KilledRegs; 363 DenseMap<unsigned, unsigned> Killer; 364 for (unsigned i = 0; i < memOpsBegin; ++i) { 365 if (memOps[i].Position < insertPos && memOps[i].isKill) { 366 unsigned Reg = memOps[i].Reg; 367 if (memOps[i].Merged) 368 UnavailRegs.insert(Reg); 369 else { 370 KilledRegs.insert(Reg); 371 Killer[Reg] = i; 372 } 373 } 374 } 375 for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { 376 if (memOps[i].Position < insertPos && memOps[i].isKill) { 377 unsigned Reg = memOps[i].Reg; 378 KilledRegs.insert(Reg); 379 Killer[Reg] = i; 380 } 381 } 382 383 SmallVector<std::pair<unsigned, bool>, 8> Regs; 384 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 385 unsigned Reg = memOps[i].Reg; 386 if (UnavailRegs.count(Reg)) 387 // Register is killed before and it's not easy / possible to update the 388 // kill marker on already merged instructions. Abort. 389 return; 390 391 // If we are inserting the merged operation after an unmerged operation that 392 // uses the same register, make sure to transfer any kill flag. 393 bool isKill = memOps[i].isKill || KilledRegs.count(Reg); 394 Regs.push_back(std::make_pair(Reg, isKill)); 395 } 396 397 // Try to do the merge. 398 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 399 ++Loc; 400 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 401 Pred, PredReg, Scratch, dl, Regs)) 402 return; 403 404 // Merge succeeded, update records. 405 Merges.push_back(prior(Loc)); 406 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 407 // Remove kill flags from any unmerged memops that come before insertPos. 408 if (Regs[i-memOpsBegin].second) { 409 unsigned Reg = Regs[i-memOpsBegin].first; 410 if (KilledRegs.count(Reg)) { 411 unsigned j = Killer[Reg]; 412 memOps[j].MBBI->getOperand(0).setIsKill(false); 413 memOps[j].isKill = false; 414 } 415 } 416 MBB.erase(memOps[i].MBBI); 417 memOps[i].Merged = true; 418 } 419} 420 421/// MergeLDR_STR - Merge a number of load / store instructions into one or more 422/// load / store multiple instructions. 423void 424ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 425 unsigned Base, int Opcode, unsigned Size, 426 ARMCC::CondCodes Pred, unsigned PredReg, 427 unsigned Scratch, MemOpQueue &MemOps, 428 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 429 bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); 430 int Offset = MemOps[SIndex].Offset; 431 int SOffset = Offset; 432 unsigned insertAfter = SIndex; 433 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 434 DebugLoc dl = Loc->getDebugLoc(); 435 const MachineOperand &PMO = Loc->getOperand(0); 436 unsigned PReg = PMO.getReg(); 437 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 438 : getARMRegisterNumbering(PReg); 439 unsigned Count = 1; 440 441 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 442 int NewOffset = MemOps[i].Offset; 443 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 444 unsigned Reg = MO.getReg(); 445 unsigned RegNum = MO.isUndef() ? UINT_MAX 446 : getARMRegisterNumbering(Reg); 447 // Register numbers must be in ascending order. For VFP, the registers 448 // must also be consecutive and there is a limit of 16 double-word 449 // registers per instruction. 450 if (Reg != ARM::SP && 451 NewOffset == Offset + (int)Size && 452 ((isNotVFP && RegNum > PRegNum) 453 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 454 Offset += Size; 455 PRegNum = RegNum; 456 ++Count; 457 } else { 458 // Can't merge this in. Try merge the earlier ones first. 459 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 460 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 461 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 462 MemOps, Merges); 463 return; 464 } 465 466 if (MemOps[i].Position > MemOps[insertAfter].Position) 467 insertAfter = i; 468 } 469 470 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 471 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 472 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 473 return; 474} 475 476static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 477 unsigned Bytes, unsigned Limit, 478 ARMCC::CondCodes Pred, unsigned PredReg){ 479 unsigned MyPredReg = 0; 480 if (!MI) 481 return false; 482 if (MI->getOpcode() != ARM::t2SUBri && 483 MI->getOpcode() != ARM::t2SUBrSPi && 484 MI->getOpcode() != ARM::t2SUBrSPi12 && 485 MI->getOpcode() != ARM::tSUBspi && 486 MI->getOpcode() != ARM::SUBri) 487 return false; 488 489 // Make sure the offset fits in 8 bits. 490 if (Bytes == 0 || (Limit && Bytes >= Limit)) 491 return false; 492 493 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 494 return (MI->getOperand(0).getReg() == Base && 495 MI->getOperand(1).getReg() == Base && 496 (MI->getOperand(2).getImm()*Scale) == Bytes && 497 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 498 MyPredReg == PredReg); 499} 500 501static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 502 unsigned Bytes, unsigned Limit, 503 ARMCC::CondCodes Pred, unsigned PredReg){ 504 unsigned MyPredReg = 0; 505 if (!MI) 506 return false; 507 if (MI->getOpcode() != ARM::t2ADDri && 508 MI->getOpcode() != ARM::t2ADDrSPi && 509 MI->getOpcode() != ARM::t2ADDrSPi12 && 510 MI->getOpcode() != ARM::tADDspi && 511 MI->getOpcode() != ARM::ADDri) 512 return false; 513 514 if (Bytes == 0 || (Limit && Bytes >= Limit)) 515 // Make sure the offset fits in 8 bits. 516 return false; 517 518 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 519 return (MI->getOperand(0).getReg() == Base && 520 MI->getOperand(1).getReg() == Base && 521 (MI->getOperand(2).getImm()*Scale) == Bytes && 522 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 523 MyPredReg == PredReg); 524} 525 526static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 527 switch (MI->getOpcode()) { 528 default: return 0; 529 case ARM::LDRi12: 530 case ARM::STRi12: 531 case ARM::t2LDRi8: 532 case ARM::t2LDRi12: 533 case ARM::t2STRi8: 534 case ARM::t2STRi12: 535 case ARM::VLDRS: 536 case ARM::VSTRS: 537 return 4; 538 case ARM::VLDRD: 539 case ARM::VSTRD: 540 return 8; 541 case ARM::LDMIA: 542 case ARM::LDMDA: 543 case ARM::LDMDB: 544 case ARM::LDMIB: 545 case ARM::STMIA: 546 case ARM::STMDA: 547 case ARM::STMDB: 548 case ARM::STMIB: 549 case ARM::t2LDMIA: 550 case ARM::t2LDMDB: 551 case ARM::t2STMIA: 552 case ARM::t2STMDB: 553 case ARM::VLDMSIA: 554 case ARM::VLDMSDB: 555 case ARM::VSTMSIA: 556 case ARM::VSTMSDB: 557 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; 558 case ARM::VLDMDIA: 559 case ARM::VLDMDDB: 560 case ARM::VSTMDIA: 561 case ARM::VSTMDDB: 562 return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; 563 } 564} 565 566static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, 567 ARM_AM::AMSubMode Mode) { 568 switch (Opc) { 569 default: llvm_unreachable("Unhandled opcode!"); 570 case ARM::LDMIA: 571 case ARM::LDMDA: 572 case ARM::LDMDB: 573 case ARM::LDMIB: 574 switch (Mode) { 575 default: llvm_unreachable("Unhandled submode!"); 576 case ARM_AM::ia: return ARM::LDMIA_UPD; 577 case ARM_AM::ib: return ARM::LDMIB_UPD; 578 case ARM_AM::da: return ARM::LDMDA_UPD; 579 case ARM_AM::db: return ARM::LDMDB_UPD; 580 } 581 break; 582 case ARM::STMIA: 583 case ARM::STMDA: 584 case ARM::STMDB: 585 case ARM::STMIB: 586 switch (Mode) { 587 default: llvm_unreachable("Unhandled submode!"); 588 case ARM_AM::ia: return ARM::STMIA_UPD; 589 case ARM_AM::ib: return ARM::STMIB_UPD; 590 case ARM_AM::da: return ARM::STMDA_UPD; 591 case ARM_AM::db: return ARM::STMDB_UPD; 592 } 593 break; 594 case ARM::t2LDMIA: 595 case ARM::t2LDMDB: 596 switch (Mode) { 597 default: llvm_unreachable("Unhandled submode!"); 598 case ARM_AM::ia: return ARM::t2LDMIA_UPD; 599 case ARM_AM::db: return ARM::t2LDMDB_UPD; 600 } 601 break; 602 case ARM::t2STMIA: 603 case ARM::t2STMDB: 604 switch (Mode) { 605 default: llvm_unreachable("Unhandled submode!"); 606 case ARM_AM::ia: return ARM::t2STMIA_UPD; 607 case ARM_AM::db: return ARM::t2STMDB_UPD; 608 } 609 break; 610 case ARM::VLDMSIA: 611 case ARM::VLDMSDB: 612 switch (Mode) { 613 default: llvm_unreachable("Unhandled submode!"); 614 case ARM_AM::ia: return ARM::VLDMSIA_UPD; 615 case ARM_AM::db: return ARM::VLDMSDB_UPD; 616 } 617 break; 618 case ARM::VLDMDIA: 619 case ARM::VLDMDDB: 620 switch (Mode) { 621 default: llvm_unreachable("Unhandled submode!"); 622 case ARM_AM::ia: return ARM::VLDMDIA_UPD; 623 case ARM_AM::db: return ARM::VLDMDDB_UPD; 624 } 625 break; 626 case ARM::VSTMSIA: 627 case ARM::VSTMSDB: 628 switch (Mode) { 629 default: llvm_unreachable("Unhandled submode!"); 630 case ARM_AM::ia: return ARM::VSTMSIA_UPD; 631 case ARM_AM::db: return ARM::VSTMSDB_UPD; 632 } 633 break; 634 case ARM::VSTMDIA: 635 case ARM::VSTMDDB: 636 switch (Mode) { 637 default: llvm_unreachable("Unhandled submode!"); 638 case ARM_AM::ia: return ARM::VSTMDIA_UPD; 639 case ARM_AM::db: return ARM::VSTMDDB_UPD; 640 } 641 break; 642 } 643 644 return 0; 645} 646 647/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 648/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 649/// 650/// stmia rn, <ra, rb, rc> 651/// rn := rn + 4 * 3; 652/// => 653/// stmia rn!, <ra, rb, rc> 654/// 655/// rn := rn - 4 * 3; 656/// ldmia rn, <ra, rb, rc> 657/// => 658/// ldmdb rn!, <ra, rb, rc> 659bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 660 MachineBasicBlock::iterator MBBI, 661 bool &Advance, 662 MachineBasicBlock::iterator &I) { 663 MachineInstr *MI = MBBI; 664 unsigned Base = MI->getOperand(0).getReg(); 665 bool BaseKill = MI->getOperand(0).isKill(); 666 unsigned Bytes = getLSMultipleTransferSize(MI); 667 unsigned PredReg = 0; 668 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 669 int Opcode = MI->getOpcode(); 670 DebugLoc dl = MI->getDebugLoc(); 671 672 // Can't use an updating ld/st if the base register is also a dest 673 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 674 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) 675 if (MI->getOperand(i).getReg() == Base) 676 return false; 677 678 bool DoMerge = false; 679 ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode); 680 681 // Try merging with the previous instruction. 682 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 683 if (MBBI != BeginMBBI) { 684 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 685 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 686 --PrevMBBI; 687 if (Mode == ARM_AM::ia && 688 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 689 Mode = ARM_AM::db; 690 DoMerge = true; 691 } else if (Mode == ARM_AM::ib && 692 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 693 Mode = ARM_AM::da; 694 DoMerge = true; 695 } 696 if (DoMerge) 697 MBB.erase(PrevMBBI); 698 } 699 700 // Try merging with the next instruction. 701 MachineBasicBlock::iterator EndMBBI = MBB.end(); 702 if (!DoMerge && MBBI != EndMBBI) { 703 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 704 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 705 ++NextMBBI; 706 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 707 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 708 DoMerge = true; 709 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 710 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 711 DoMerge = true; 712 } 713 if (DoMerge) { 714 if (NextMBBI == I) { 715 Advance = true; 716 ++I; 717 } 718 MBB.erase(NextMBBI); 719 } 720 } 721 722 if (!DoMerge) 723 return false; 724 725 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode); 726 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 727 .addReg(Base, getDefRegState(true)) // WB base register 728 .addReg(Base, getKillRegState(BaseKill)) 729 .addImm(Pred).addReg(PredReg); 730 731 // Transfer the rest of operands. 732 for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum) 733 MIB.addOperand(MI->getOperand(OpNum)); 734 735 // Transfer memoperands. 736 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 737 738 MBB.erase(MBBI); 739 return true; 740} 741 742static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc, 743 ARM_AM::AddrOpc Mode) { 744 switch (Opc) { 745 case ARM::LDRi12: 746 return ARM::LDR_PRE; 747 case ARM::STRi12: 748 return ARM::STR_PRE; 749 case ARM::VLDRS: 750 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 751 case ARM::VLDRD: 752 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 753 case ARM::VSTRS: 754 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 755 case ARM::VSTRD: 756 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 757 case ARM::t2LDRi8: 758 case ARM::t2LDRi12: 759 return ARM::t2LDR_PRE; 760 case ARM::t2STRi8: 761 case ARM::t2STRi12: 762 return ARM::t2STR_PRE; 763 default: llvm_unreachable("Unhandled opcode!"); 764 } 765 return 0; 766} 767 768static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc, 769 ARM_AM::AddrOpc Mode) { 770 switch (Opc) { 771 case ARM::LDRi12: 772 return ARM::LDR_POST; 773 case ARM::STRi12: 774 return ARM::STR_POST; 775 case ARM::VLDRS: 776 return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD; 777 case ARM::VLDRD: 778 return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD; 779 case ARM::VSTRS: 780 return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD; 781 case ARM::VSTRD: 782 return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD; 783 case ARM::t2LDRi8: 784 case ARM::t2LDRi12: 785 return ARM::t2LDR_POST; 786 case ARM::t2STRi8: 787 case ARM::t2STRi12: 788 return ARM::t2STR_POST; 789 default: llvm_unreachable("Unhandled opcode!"); 790 } 791 return 0; 792} 793 794/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 795/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 796bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 797 MachineBasicBlock::iterator MBBI, 798 const TargetInstrInfo *TII, 799 bool &Advance, 800 MachineBasicBlock::iterator &I) { 801 MachineInstr *MI = MBBI; 802 unsigned Base = MI->getOperand(1).getReg(); 803 bool BaseKill = MI->getOperand(1).isKill(); 804 unsigned Bytes = getLSMultipleTransferSize(MI); 805 int Opcode = MI->getOpcode(); 806 DebugLoc dl = MI->getDebugLoc(); 807 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 808 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 809 bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12); 810 if (isi32Load(Opcode) || isi32Store(Opcode)) 811 if (MI->getOperand(2).getImm() != 0) 812 return false; 813 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 814 return false; 815 816 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 817 // Can't do the merge if the destination register is the same as the would-be 818 // writeback register. 819 if (isLd && MI->getOperand(0).getReg() == Base) 820 return false; 821 822 unsigned PredReg = 0; 823 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 824 bool DoMerge = false; 825 ARM_AM::AddrOpc AddSub = ARM_AM::add; 826 unsigned NewOpc = 0; 827 // AM2 - 12 bits, thumb2 - 8 bits. 828 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 829 830 // Try merging with the previous instruction. 831 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 832 if (MBBI != BeginMBBI) { 833 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 834 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 835 --PrevMBBI; 836 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 837 DoMerge = true; 838 AddSub = ARM_AM::sub; 839 } else if (!isAM5 && 840 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 841 DoMerge = true; 842 } 843 if (DoMerge) { 844 NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub); 845 MBB.erase(PrevMBBI); 846 } 847 } 848 849 // Try merging with the next instruction. 850 MachineBasicBlock::iterator EndMBBI = MBB.end(); 851 if (!DoMerge && MBBI != EndMBBI) { 852 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 853 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 854 ++NextMBBI; 855 if (!isAM5 && 856 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 857 DoMerge = true; 858 AddSub = ARM_AM::sub; 859 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 860 DoMerge = true; 861 } 862 if (DoMerge) { 863 NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub); 864 if (NextMBBI == I) { 865 Advance = true; 866 ++I; 867 } 868 MBB.erase(NextMBBI); 869 } 870 } 871 872 if (!DoMerge) 873 return false; 874 875 unsigned Offset = 0; 876 if (isAM2) 877 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 878 else if (!isAM5) 879 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 880 881 if (isAM5) { 882 // VLDM[SD}_UPD, VSTM[SD]_UPD 883 // (There are no base-updating versions of VLDR/VSTR instructions, but the 884 // updating load/store-multiple instructions can be used with only one 885 // register.) 886 MachineOperand &MO = MI->getOperand(0); 887 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 888 .addReg(Base, getDefRegState(true)) // WB base register 889 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 890 .addImm(Pred).addReg(PredReg) 891 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 892 getKillRegState(MO.isKill()))); 893 } else if (isLd) { 894 if (isAM2) 895 // LDR_PRE, LDR_POST, 896 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 897 .addReg(Base, RegState::Define) 898 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 899 else 900 // t2LDR_PRE, t2LDR_POST 901 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 902 .addReg(Base, RegState::Define) 903 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 904 } else { 905 MachineOperand &MO = MI->getOperand(0); 906 if (isAM2) 907 // STR_PRE, STR_POST 908 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 909 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 910 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 911 else 912 // t2STR_PRE, t2STR_POST 913 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 914 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 915 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 916 } 917 MBB.erase(MBBI); 918 919 return true; 920} 921 922/// isMemoryOp - Returns true if instruction is a memory operations (that this 923/// pass is capable of operating on). 924static bool isMemoryOp(const MachineInstr *MI) { 925 // When no memory operands are present, conservatively assume unaligned, 926 // volatile, unfoldable. 927 if (!MI->hasOneMemOperand()) 928 return false; 929 930 const MachineMemOperand *MMO = *MI->memoperands_begin(); 931 932 // Don't touch volatile memory accesses - we may be changing their order. 933 if (MMO->isVolatile()) 934 return false; 935 936 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 937 // not. 938 if (MMO->getAlignment() < 4) 939 return false; 940 941 // str <undef> could probably be eliminated entirely, but for now we just want 942 // to avoid making a mess of it. 943 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 944 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 945 MI->getOperand(0).isUndef()) 946 return false; 947 948 // Likewise don't mess with references to undefined addresses. 949 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 950 MI->getOperand(1).isUndef()) 951 return false; 952 953 int Opcode = MI->getOpcode(); 954 switch (Opcode) { 955 default: break; 956 case ARM::VLDRS: 957 case ARM::VSTRS: 958 return MI->getOperand(1).isReg(); 959 case ARM::VLDRD: 960 case ARM::VSTRD: 961 return MI->getOperand(1).isReg(); 962 case ARM::LDRi12: 963 case ARM::STRi12: 964 case ARM::t2LDRi8: 965 case ARM::t2LDRi12: 966 case ARM::t2STRi8: 967 case ARM::t2STRi12: 968 return MI->getOperand(1).isReg(); 969 } 970 return false; 971} 972 973/// AdvanceRS - Advance register scavenger to just before the earliest memory 974/// op that is being merged. 975void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 976 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 977 unsigned Position = MemOps[0].Position; 978 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 979 if (MemOps[i].Position < Position) { 980 Position = MemOps[i].Position; 981 Loc = MemOps[i].MBBI; 982 } 983 } 984 985 if (Loc != MBB.begin()) 986 RS->forward(prior(Loc)); 987} 988 989static int getMemoryOpOffset(const MachineInstr *MI) { 990 int Opcode = MI->getOpcode(); 991 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 992 unsigned NumOperands = MI->getDesc().getNumOperands(); 993 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 994 995 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 996 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 997 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || 998 Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) 999 return OffField; 1000 1001 int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) 1002 : ARM_AM::getAM5Offset(OffField) * 4; 1003 if (isAM3) { 1004 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 1005 Offset = -Offset; 1006 } else { 1007 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 1008 Offset = -Offset; 1009 } 1010 return Offset; 1011} 1012 1013static void InsertLDR_STR(MachineBasicBlock &MBB, 1014 MachineBasicBlock::iterator &MBBI, 1015 int Offset, bool isDef, 1016 DebugLoc dl, unsigned NewOpc, 1017 unsigned Reg, bool RegDeadKill, bool RegUndef, 1018 unsigned BaseReg, bool BaseKill, bool BaseUndef, 1019 bool OffKill, bool OffUndef, 1020 ARMCC::CondCodes Pred, unsigned PredReg, 1021 const TargetInstrInfo *TII, bool isT2) { 1022 if (isDef) { 1023 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1024 TII->get(NewOpc)) 1025 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 1026 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1027 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1028 } else { 1029 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 1030 TII->get(NewOpc)) 1031 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 1032 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 1033 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1034 } 1035} 1036 1037bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 1038 MachineBasicBlock::iterator &MBBI) { 1039 MachineInstr *MI = &*MBBI; 1040 unsigned Opcode = MI->getOpcode(); 1041 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 1042 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 1043 unsigned EvenReg = MI->getOperand(0).getReg(); 1044 unsigned OddReg = MI->getOperand(1).getReg(); 1045 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 1046 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 1047 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 1048 return false; 1049 1050 MachineBasicBlock::iterator NewBBI = MBBI; 1051 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 1052 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 1053 bool EvenDeadKill = isLd ? 1054 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 1055 bool EvenUndef = MI->getOperand(0).isUndef(); 1056 bool OddDeadKill = isLd ? 1057 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 1058 bool OddUndef = MI->getOperand(1).isUndef(); 1059 const MachineOperand &BaseOp = MI->getOperand(2); 1060 unsigned BaseReg = BaseOp.getReg(); 1061 bool BaseKill = BaseOp.isKill(); 1062 bool BaseUndef = BaseOp.isUndef(); 1063 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 1064 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 1065 int OffImm = getMemoryOpOffset(MI); 1066 unsigned PredReg = 0; 1067 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 1068 1069 if (OddRegNum > EvenRegNum && OffImm == 0) { 1070 // Ascending register numbers and no offset. It's safe to change it to a 1071 // ldm or stm. 1072 unsigned NewOpc = (isLd) 1073 ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA) 1074 : (isT2 ? ARM::t2STMIA : ARM::STMIA); 1075 if (isLd) { 1076 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1077 .addReg(BaseReg, getKillRegState(BaseKill)) 1078 .addImm(Pred).addReg(PredReg) 1079 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 1080 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 1081 ++NumLDRD2LDM; 1082 } else { 1083 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 1084 .addReg(BaseReg, getKillRegState(BaseKill)) 1085 .addImm(Pred).addReg(PredReg) 1086 .addReg(EvenReg, 1087 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 1088 .addReg(OddReg, 1089 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 1090 ++NumSTRD2STM; 1091 } 1092 NewBBI = llvm::prior(MBBI); 1093 } else { 1094 // Split into two instructions. 1095 unsigned NewOpc = (isLd) 1096 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) 1097 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); 1098 DebugLoc dl = MBBI->getDebugLoc(); 1099 // If this is a load and base register is killed, it may have been 1100 // re-defed by the load, make sure the first load does not clobber it. 1101 if (isLd && 1102 (BaseKill || OffKill) && 1103 (TRI->regsOverlap(EvenReg, BaseReg))) { 1104 assert(!TRI->regsOverlap(OddReg, BaseReg)); 1105 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1106 OddReg, OddDeadKill, false, 1107 BaseReg, false, BaseUndef, false, OffUndef, 1108 Pred, PredReg, TII, isT2); 1109 NewBBI = llvm::prior(MBBI); 1110 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1111 EvenReg, EvenDeadKill, false, 1112 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1113 Pred, PredReg, TII, isT2); 1114 } else { 1115 if (OddReg == EvenReg && EvenDeadKill) { 1116 // If the two source operands are the same, the kill marker is 1117 // probably on the first one. e.g. 1118 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 1119 EvenDeadKill = false; 1120 OddDeadKill = true; 1121 } 1122 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 1123 EvenReg, EvenDeadKill, EvenUndef, 1124 BaseReg, false, BaseUndef, false, OffUndef, 1125 Pred, PredReg, TII, isT2); 1126 NewBBI = llvm::prior(MBBI); 1127 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 1128 OddReg, OddDeadKill, OddUndef, 1129 BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, 1130 Pred, PredReg, TII, isT2); 1131 } 1132 if (isLd) 1133 ++NumLDRD2LDR; 1134 else 1135 ++NumSTRD2STR; 1136 } 1137 1138 MBB.erase(MI); 1139 MBBI = NewBBI; 1140 return true; 1141 } 1142 return false; 1143} 1144 1145/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1146/// ops of the same base and incrementing offset into LDM / STM ops. 1147bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1148 unsigned NumMerges = 0; 1149 unsigned NumMemOps = 0; 1150 MemOpQueue MemOps; 1151 unsigned CurrBase = 0; 1152 int CurrOpc = -1; 1153 unsigned CurrSize = 0; 1154 ARMCC::CondCodes CurrPred = ARMCC::AL; 1155 unsigned CurrPredReg = 0; 1156 unsigned Position = 0; 1157 SmallVector<MachineBasicBlock::iterator,4> Merges; 1158 1159 RS->enterBasicBlock(&MBB); 1160 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1161 while (MBBI != E) { 1162 if (FixInvalidRegPairOp(MBB, MBBI)) 1163 continue; 1164 1165 bool Advance = false; 1166 bool TryMerge = false; 1167 bool Clobber = false; 1168 1169 bool isMemOp = isMemoryOp(MBBI); 1170 if (isMemOp) { 1171 int Opcode = MBBI->getOpcode(); 1172 unsigned Size = getLSMultipleTransferSize(MBBI); 1173 const MachineOperand &MO = MBBI->getOperand(0); 1174 unsigned Reg = MO.getReg(); 1175 bool isKill = MO.isDef() ? false : MO.isKill(); 1176 unsigned Base = MBBI->getOperand(1).getReg(); 1177 unsigned PredReg = 0; 1178 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1179 int Offset = getMemoryOpOffset(MBBI); 1180 // Watch out for: 1181 // r4 := ldr [r5] 1182 // r5 := ldr [r5, #4] 1183 // r6 := ldr [r5, #8] 1184 // 1185 // The second ldr has effectively broken the chain even though it 1186 // looks like the later ldr(s) use the same base register. Try to 1187 // merge the ldr's so far, including this one. But don't try to 1188 // combine the following ldr(s). 1189 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1190 if (CurrBase == 0 && !Clobber) { 1191 // Start of a new chain. 1192 CurrBase = Base; 1193 CurrOpc = Opcode; 1194 CurrSize = Size; 1195 CurrPred = Pred; 1196 CurrPredReg = PredReg; 1197 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); 1198 ++NumMemOps; 1199 Advance = true; 1200 } else { 1201 if (Clobber) { 1202 TryMerge = true; 1203 Advance = true; 1204 } 1205 1206 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1207 // No need to match PredReg. 1208 // Continue adding to the queue. 1209 if (Offset > MemOps.back().Offset) { 1210 MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, 1211 Position, MBBI)); 1212 ++NumMemOps; 1213 Advance = true; 1214 } else { 1215 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1216 I != E; ++I) { 1217 if (Offset < I->Offset) { 1218 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, 1219 Position, MBBI)); 1220 ++NumMemOps; 1221 Advance = true; 1222 break; 1223 } else if (Offset == I->Offset) { 1224 // Collision! This can't be merged! 1225 break; 1226 } 1227 } 1228 } 1229 } 1230 } 1231 } 1232 1233 if (MBBI->isDebugValue()) { 1234 ++MBBI; 1235 if (MBBI == E) 1236 // Reach the end of the block, try merging the memory instructions. 1237 TryMerge = true; 1238 } else if (Advance) { 1239 ++Position; 1240 ++MBBI; 1241 if (MBBI == E) 1242 // Reach the end of the block, try merging the memory instructions. 1243 TryMerge = true; 1244 } else 1245 TryMerge = true; 1246 1247 if (TryMerge) { 1248 if (NumMemOps > 1) { 1249 // Try to find a free register to use as a new base in case it's needed. 1250 // First advance to the instruction just before the start of the chain. 1251 AdvanceRS(MBB, MemOps); 1252 // Find a scratch register. 1253 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1254 // Process the load / store instructions. 1255 RS->forward(prior(MBBI)); 1256 1257 // Merge ops. 1258 Merges.clear(); 1259 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1260 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1261 1262 // Try folding preceeding/trailing base inc/dec into the generated 1263 // LDM/STM ops. 1264 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1265 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1266 ++NumMerges; 1267 NumMerges += Merges.size(); 1268 1269 // Try folding preceeding/trailing base inc/dec into those load/store 1270 // that were not merged to form LDM/STM ops. 1271 for (unsigned i = 0; i != NumMemOps; ++i) 1272 if (!MemOps[i].Merged) 1273 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1274 ++NumMerges; 1275 1276 // RS may be pointing to an instruction that's deleted. 1277 RS->skipTo(prior(MBBI)); 1278 } else if (NumMemOps == 1) { 1279 // Try folding preceeding/trailing base inc/dec into the single 1280 // load/store. 1281 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1282 ++NumMerges; 1283 RS->forward(prior(MBBI)); 1284 } 1285 } 1286 1287 CurrBase = 0; 1288 CurrOpc = -1; 1289 CurrSize = 0; 1290 CurrPred = ARMCC::AL; 1291 CurrPredReg = 0; 1292 if (NumMemOps) { 1293 MemOps.clear(); 1294 NumMemOps = 0; 1295 } 1296 1297 // If iterator hasn't been advanced and this is not a memory op, skip it. 1298 // It can't start a new chain anyway. 1299 if (!Advance && !isMemOp && MBBI != E) { 1300 ++Position; 1301 ++MBBI; 1302 } 1303 } 1304 } 1305 return NumMerges > 0; 1306} 1307 1308namespace { 1309 struct OffsetCompare { 1310 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1311 int LOffset = getMemoryOpOffset(LHS); 1312 int ROffset = getMemoryOpOffset(RHS); 1313 assert(LHS == RHS || LOffset != ROffset); 1314 return LOffset > ROffset; 1315 } 1316 }; 1317} 1318 1319/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1320/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1321/// directly restore the value of LR into pc. 1322/// ldmfd sp!, {..., lr} 1323/// bx lr 1324/// or 1325/// ldmfd sp!, {..., lr} 1326/// mov pc, lr 1327/// => 1328/// ldmfd sp!, {..., pc} 1329bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1330 if (MBB.empty()) return false; 1331 1332 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1333 if (MBBI != MBB.begin() && 1334 (MBBI->getOpcode() == ARM::BX_RET || 1335 MBBI->getOpcode() == ARM::tBX_RET || 1336 MBBI->getOpcode() == ARM::MOVPCLR)) { 1337 MachineInstr *PrevMI = prior(MBBI); 1338 unsigned Opcode = PrevMI->getOpcode(); 1339 if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD || 1340 Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD || 1341 Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { 1342 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1343 if (MO.getReg() != ARM::LR) 1344 return false; 1345 unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET); 1346 assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) || 1347 Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!"); 1348 PrevMI->setDesc(TII->get(NewOpc)); 1349 MO.setReg(ARM::PC); 1350 PrevMI->copyImplicitOps(&*MBBI); 1351 MBB.erase(MBBI); 1352 return true; 1353 } 1354 } 1355 return false; 1356} 1357 1358bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1359 const TargetMachine &TM = Fn.getTarget(); 1360 AFI = Fn.getInfo<ARMFunctionInfo>(); 1361 TII = TM.getInstrInfo(); 1362 TRI = TM.getRegisterInfo(); 1363 RS = new RegScavenger(); 1364 isThumb2 = AFI->isThumb2Function(); 1365 1366 bool Modified = false; 1367 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1368 ++MFI) { 1369 MachineBasicBlock &MBB = *MFI; 1370 Modified |= LoadStoreMultipleOpti(MBB); 1371 Modified |= MergeReturnIntoLDM(MBB); 1372 } 1373 1374 delete RS; 1375 return Modified; 1376} 1377 1378 1379/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1380/// load / stores from consecutive locations close to make it more 1381/// likely they will be combined later. 1382 1383namespace { 1384 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1385 static char ID; 1386 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} 1387 1388 const TargetData *TD; 1389 const TargetInstrInfo *TII; 1390 const TargetRegisterInfo *TRI; 1391 const ARMSubtarget *STI; 1392 MachineRegisterInfo *MRI; 1393 MachineFunction *MF; 1394 1395 virtual bool runOnMachineFunction(MachineFunction &Fn); 1396 1397 virtual const char *getPassName() const { 1398 return "ARM pre- register allocation load / store optimization pass"; 1399 } 1400 1401 private: 1402 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1403 unsigned &NewOpc, unsigned &EvenReg, 1404 unsigned &OddReg, unsigned &BaseReg, 1405 int &Offset, 1406 unsigned &PredReg, ARMCC::CondCodes &Pred, 1407 bool &isT2); 1408 bool RescheduleOps(MachineBasicBlock *MBB, 1409 SmallVector<MachineInstr*, 4> &Ops, 1410 unsigned Base, bool isLd, 1411 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1412 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1413 }; 1414 char ARMPreAllocLoadStoreOpt::ID = 0; 1415} 1416 1417bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1418 TD = Fn.getTarget().getTargetData(); 1419 TII = Fn.getTarget().getInstrInfo(); 1420 TRI = Fn.getTarget().getRegisterInfo(); 1421 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1422 MRI = &Fn.getRegInfo(); 1423 MF = &Fn; 1424 1425 bool Modified = false; 1426 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1427 ++MFI) 1428 Modified |= RescheduleLoadStoreInstrs(MFI); 1429 1430 return Modified; 1431} 1432 1433static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1434 MachineBasicBlock::iterator I, 1435 MachineBasicBlock::iterator E, 1436 SmallPtrSet<MachineInstr*, 4> &MemOps, 1437 SmallSet<unsigned, 4> &MemRegs, 1438 const TargetRegisterInfo *TRI) { 1439 // Are there stores / loads / calls between them? 1440 // FIXME: This is overly conservative. We should make use of alias information 1441 // some day. 1442 SmallSet<unsigned, 4> AddedRegPressure; 1443 while (++I != E) { 1444 if (I->isDebugValue() || MemOps.count(&*I)) 1445 continue; 1446 const TargetInstrDesc &TID = I->getDesc(); 1447 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1448 return false; 1449 if (isLd && TID.mayStore()) 1450 return false; 1451 if (!isLd) { 1452 if (TID.mayLoad()) 1453 return false; 1454 // It's not safe to move the first 'str' down. 1455 // str r1, [r0] 1456 // strh r5, [r0] 1457 // str r4, [r0, #+4] 1458 if (TID.mayStore()) 1459 return false; 1460 } 1461 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1462 MachineOperand &MO = I->getOperand(j); 1463 if (!MO.isReg()) 1464 continue; 1465 unsigned Reg = MO.getReg(); 1466 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1467 return false; 1468 if (Reg != Base && !MemRegs.count(Reg)) 1469 AddedRegPressure.insert(Reg); 1470 } 1471 } 1472 1473 // Estimate register pressure increase due to the transformation. 1474 if (MemRegs.size() <= 4) 1475 // Ok if we are moving small number of instructions. 1476 return true; 1477 return AddedRegPressure.size() <= MemRegs.size() * 2; 1478} 1479 1480bool 1481ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1482 DebugLoc &dl, 1483 unsigned &NewOpc, unsigned &EvenReg, 1484 unsigned &OddReg, unsigned &BaseReg, 1485 int &Offset, unsigned &PredReg, 1486 ARMCC::CondCodes &Pred, 1487 bool &isT2) { 1488 // Make sure we're allowed to generate LDRD/STRD. 1489 if (!STI->hasV5TEOps()) 1490 return false; 1491 1492 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1493 unsigned Scale = 1; 1494 unsigned Opcode = Op0->getOpcode(); 1495 if (Opcode == ARM::LDRi12) 1496 NewOpc = ARM::LDRD; 1497 else if (Opcode == ARM::STRi12) 1498 NewOpc = ARM::STRD; 1499 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1500 NewOpc = ARM::t2LDRDi8; 1501 Scale = 4; 1502 isT2 = true; 1503 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1504 NewOpc = ARM::t2STRDi8; 1505 Scale = 4; 1506 isT2 = true; 1507 } else 1508 return false; 1509 1510 // Make sure the base address satisfies i64 ld / st alignment requirement. 1511 if (!Op0->hasOneMemOperand() || 1512 !(*Op0->memoperands_begin())->getValue() || 1513 (*Op0->memoperands_begin())->isVolatile()) 1514 return false; 1515 1516 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1517 const Function *Func = MF->getFunction(); 1518 unsigned ReqAlign = STI->hasV6Ops() 1519 ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) 1520 : 8; // Pre-v6 need 8-byte align 1521 if (Align < ReqAlign) 1522 return false; 1523 1524 // Then make sure the immediate offset fits. 1525 int OffImm = getMemoryOpOffset(Op0); 1526 if (isT2) { 1527 if (OffImm < 0) { 1528 if (OffImm < -255) 1529 // Can't fall back to t2LDRi8 / t2STRi8. 1530 return false; 1531 } else { 1532 int Limit = (1 << 8) * Scale; 1533 if (OffImm >= Limit || (OffImm & (Scale-1))) 1534 return false; 1535 } 1536 Offset = OffImm; 1537 } else { 1538 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1539 if (OffImm < 0) { 1540 AddSub = ARM_AM::sub; 1541 OffImm = - OffImm; 1542 } 1543 int Limit = (1 << 8) * Scale; 1544 if (OffImm >= Limit || (OffImm & (Scale-1))) 1545 return false; 1546 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1547 } 1548 EvenReg = Op0->getOperand(0).getReg(); 1549 OddReg = Op1->getOperand(0).getReg(); 1550 if (EvenReg == OddReg) 1551 return false; 1552 BaseReg = Op0->getOperand(1).getReg(); 1553 Pred = llvm::getInstrPredicate(Op0, PredReg); 1554 dl = Op0->getDebugLoc(); 1555 return true; 1556} 1557 1558static MachineMemOperand *CopyMMO(const MachineMemOperand *MMO, 1559 unsigned NewSize, MachineFunction *MF) { 1560 return MF->getMachineMemOperand(MachinePointerInfo(MMO->getValue(), 1561 MMO->getOffset()), 1562 MMO->getFlags(), NewSize, 1563 MMO->getAlignment(), MMO->getTBAAInfo()); 1564} 1565 1566bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1567 SmallVector<MachineInstr*, 4> &Ops, 1568 unsigned Base, bool isLd, 1569 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1570 bool RetVal = false; 1571 1572 // Sort by offset (in reverse order). 1573 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1574 1575 // The loads / stores of the same base are in order. Scan them from first to 1576 // last and check for the following: 1577 // 1. Any def of base. 1578 // 2. Any gaps. 1579 while (Ops.size() > 1) { 1580 unsigned FirstLoc = ~0U; 1581 unsigned LastLoc = 0; 1582 MachineInstr *FirstOp = 0; 1583 MachineInstr *LastOp = 0; 1584 int LastOffset = 0; 1585 unsigned LastOpcode = 0; 1586 unsigned LastBytes = 0; 1587 unsigned NumMove = 0; 1588 for (int i = Ops.size() - 1; i >= 0; --i) { 1589 MachineInstr *Op = Ops[i]; 1590 unsigned Loc = MI2LocMap[Op]; 1591 if (Loc <= FirstLoc) { 1592 FirstLoc = Loc; 1593 FirstOp = Op; 1594 } 1595 if (Loc >= LastLoc) { 1596 LastLoc = Loc; 1597 LastOp = Op; 1598 } 1599 1600 unsigned Opcode = Op->getOpcode(); 1601 if (LastOpcode && Opcode != LastOpcode) 1602 break; 1603 1604 int Offset = getMemoryOpOffset(Op); 1605 unsigned Bytes = getLSMultipleTransferSize(Op); 1606 if (LastBytes) { 1607 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1608 break; 1609 } 1610 LastOffset = Offset; 1611 LastBytes = Bytes; 1612 LastOpcode = Opcode; 1613 if (++NumMove == 8) // FIXME: Tune this limit. 1614 break; 1615 } 1616 1617 if (NumMove <= 1) 1618 Ops.pop_back(); 1619 else { 1620 SmallPtrSet<MachineInstr*, 4> MemOps; 1621 SmallSet<unsigned, 4> MemRegs; 1622 for (int i = NumMove-1; i >= 0; --i) { 1623 MemOps.insert(Ops[i]); 1624 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1625 } 1626 1627 // Be conservative, if the instructions are too far apart, don't 1628 // move them. We want to limit the increase of register pressure. 1629 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1630 if (DoMove) 1631 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1632 MemOps, MemRegs, TRI); 1633 if (!DoMove) { 1634 for (unsigned i = 0; i != NumMove; ++i) 1635 Ops.pop_back(); 1636 } else { 1637 // This is the new location for the loads / stores. 1638 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1639 while (InsertPos != MBB->end() 1640 && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) 1641 ++InsertPos; 1642 1643 // If we are moving a pair of loads / stores, see if it makes sense 1644 // to try to allocate a pair of registers that can form register pairs. 1645 MachineInstr *Op0 = Ops.back(); 1646 MachineInstr *Op1 = Ops[Ops.size()-2]; 1647 unsigned EvenReg = 0, OddReg = 0; 1648 unsigned BaseReg = 0, PredReg = 0; 1649 ARMCC::CondCodes Pred = ARMCC::AL; 1650 bool isT2 = false; 1651 unsigned NewOpc = 0; 1652 int Offset = 0; 1653 DebugLoc dl; 1654 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1655 EvenReg, OddReg, BaseReg, 1656 Offset, PredReg, Pred, isT2)) { 1657 Ops.pop_back(); 1658 Ops.pop_back(); 1659 1660 // Form the pair instruction. 1661 if (isLd) { 1662 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1663 dl, TII->get(NewOpc)) 1664 .addReg(EvenReg, RegState::Define) 1665 .addReg(OddReg, RegState::Define) 1666 .addReg(BaseReg); 1667 // FIXME: We're converting from LDRi12 to an insn that still 1668 // uses addrmode2, so we need an explicit offset reg. It should 1669 // always by reg0 since we're transforming LDRi12s. 1670 if (!isT2) 1671 MIB.addReg(0); 1672 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1673 1674 // Copy memoperands bug change size to 8. 1675 for (MachineInstr::mmo_iterator mmo = Op0->memoperands_begin(); 1676 mmo != Op0->memoperands_end(); ++mmo) 1677 MIB.addMemOperand(CopyMMO(*mmo, 8, MF)); 1678 ++NumLDRDFormed; 1679 } else { 1680 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1681 dl, TII->get(NewOpc)) 1682 .addReg(EvenReg) 1683 .addReg(OddReg) 1684 .addReg(BaseReg); 1685 // FIXME: We're converting from LDRi12 to an insn that still 1686 // uses addrmode2, so we need an explicit offset reg. It should 1687 // always by reg0 since we're transforming STRi12s. 1688 if (!isT2) 1689 MIB.addReg(0); 1690 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1691 // Copy memoperands bug change size to 8. 1692 for (MachineInstr::mmo_iterator mmo = Op0->memoperands_begin(); 1693 mmo != Op0->memoperands_end(); ++mmo) 1694 MIB.addMemOperand(CopyMMO(*mmo, 8, MF)); 1695 ++NumSTRDFormed; 1696 } 1697 MBB->erase(Op0); 1698 MBB->erase(Op1); 1699 1700 // Add register allocation hints to form register pairs. 1701 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1702 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1703 } else { 1704 for (unsigned i = 0; i != NumMove; ++i) { 1705 MachineInstr *Op = Ops.back(); 1706 Ops.pop_back(); 1707 MBB->splice(InsertPos, MBB, Op); 1708 } 1709 } 1710 1711 NumLdStMoved += NumMove; 1712 RetVal = true; 1713 } 1714 } 1715 } 1716 1717 return RetVal; 1718} 1719 1720bool 1721ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1722 bool RetVal = false; 1723 1724 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1725 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1726 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1727 SmallVector<unsigned, 4> LdBases; 1728 SmallVector<unsigned, 4> StBases; 1729 1730 unsigned Loc = 0; 1731 MachineBasicBlock::iterator MBBI = MBB->begin(); 1732 MachineBasicBlock::iterator E = MBB->end(); 1733 while (MBBI != E) { 1734 for (; MBBI != E; ++MBBI) { 1735 MachineInstr *MI = MBBI; 1736 const TargetInstrDesc &TID = MI->getDesc(); 1737 if (TID.isCall() || TID.isTerminator()) { 1738 // Stop at barriers. 1739 ++MBBI; 1740 break; 1741 } 1742 1743 if (!MI->isDebugValue()) 1744 MI2LocMap[MI] = ++Loc; 1745 1746 if (!isMemoryOp(MI)) 1747 continue; 1748 unsigned PredReg = 0; 1749 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1750 continue; 1751 1752 int Opc = MI->getOpcode(); 1753 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1754 unsigned Base = MI->getOperand(1).getReg(); 1755 int Offset = getMemoryOpOffset(MI); 1756 1757 bool StopHere = false; 1758 if (isLd) { 1759 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1760 Base2LdsMap.find(Base); 1761 if (BI != Base2LdsMap.end()) { 1762 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1763 if (Offset == getMemoryOpOffset(BI->second[i])) { 1764 StopHere = true; 1765 break; 1766 } 1767 } 1768 if (!StopHere) 1769 BI->second.push_back(MI); 1770 } else { 1771 SmallVector<MachineInstr*, 4> MIs; 1772 MIs.push_back(MI); 1773 Base2LdsMap[Base] = MIs; 1774 LdBases.push_back(Base); 1775 } 1776 } else { 1777 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1778 Base2StsMap.find(Base); 1779 if (BI != Base2StsMap.end()) { 1780 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1781 if (Offset == getMemoryOpOffset(BI->second[i])) { 1782 StopHere = true; 1783 break; 1784 } 1785 } 1786 if (!StopHere) 1787 BI->second.push_back(MI); 1788 } else { 1789 SmallVector<MachineInstr*, 4> MIs; 1790 MIs.push_back(MI); 1791 Base2StsMap[Base] = MIs; 1792 StBases.push_back(Base); 1793 } 1794 } 1795 1796 if (StopHere) { 1797 // Found a duplicate (a base+offset combination that's seen earlier). 1798 // Backtrack. 1799 --Loc; 1800 break; 1801 } 1802 } 1803 1804 // Re-schedule loads. 1805 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1806 unsigned Base = LdBases[i]; 1807 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1808 if (Lds.size() > 1) 1809 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1810 } 1811 1812 // Re-schedule stores. 1813 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1814 unsigned Base = StBases[i]; 1815 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1816 if (Sts.size() > 1) 1817 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1818 } 1819 1820 if (MBBI != E) { 1821 Base2LdsMap.clear(); 1822 Base2StsMap.clear(); 1823 LdBases.clear(); 1824 StBases.clear(); 1825 } 1826 } 1827 1828 return RetVal; 1829} 1830 1831 1832/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1833/// optimization pass. 1834FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1835 if (PreAlloc) 1836 return new ARMPreAllocLoadStoreOpt(); 1837 return new ARMLoadStoreOpt(); 1838} 1839