ARMLoadStoreOptimizer.cpp revision d089a7ac70397ea41fe6128639dc54b8e273ed60
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMBaseInstrInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMRegisterInfo.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/Target/TargetData.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/ADT/DenseMap.h" 35#include "llvm/ADT/STLExtras.h" 36#include "llvm/ADT/SmallPtrSet.h" 37#include "llvm/ADT/SmallSet.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40using namespace llvm; 41 42STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55/// load / store instructions to form ldm / stm instructions. 56 57namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Position; 78 MachineBasicBlock::iterator MBBI; 79 bool Merged; 80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 81 : Offset(o), Position(p), MBBI(i), Merged(false) {} 82 }; 83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 84 typedef MemOpQueue::iterator MemOpQueueIter; 85 86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 87 int Offset, unsigned Base, bool BaseKill, int Opcode, 88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 90 void MergeOpsUpdate(MachineBasicBlock &MBB, 91 MemOpQueue &MemOps, 92 unsigned memOpsBegin, 93 unsigned memOpsEnd, 94 unsigned insertAfter, 95 int Offset, 96 unsigned Base, 97 bool BaseKill, 98 int Opcode, 99 ARMCC::CondCodes Pred, 100 unsigned PredReg, 101 unsigned Scratch, 102 DebugLoc dl, 103 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 105 int Opcode, unsigned Size, 106 ARMCC::CondCodes Pred, unsigned PredReg, 107 unsigned Scratch, MemOpQueue &MemOps, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator &MBBI); 113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 const TargetInstrInfo *TII, 116 bool &Advance, 117 MachineBasicBlock::iterator &I); 118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 124 }; 125 char ARMLoadStoreOpt::ID = 0; 126} 127 128static int getLoadStoreMultipleOpcode(int Opcode) { 129 switch (Opcode) { 130 case ARM::LDR: 131 NumLDMGened++; 132 return ARM::LDM; 133 case ARM::STR: 134 NumSTMGened++; 135 return ARM::STM; 136 case ARM::t2LDRi8: 137 case ARM::t2LDRi12: 138 NumLDMGened++; 139 return ARM::t2LDM; 140 case ARM::t2STRi8: 141 case ARM::t2STRi12: 142 NumSTMGened++; 143 return ARM::t2STM; 144 case ARM::VLDRS: 145 NumVLDMGened++; 146 return ARM::VLDMS; 147 case ARM::VSTRS: 148 NumVSTMGened++; 149 return ARM::VSTMS; 150 case ARM::VLDRD: 151 NumVLDMGened++; 152 return ARM::VLDMD; 153 case ARM::VSTRD: 154 NumVSTMGened++; 155 return ARM::VSTMD; 156 default: llvm_unreachable("Unhandled opcode!"); 157 } 158 return 0; 159} 160 161static bool isT2i32Load(unsigned Opc) { 162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 163} 164 165static bool isi32Load(unsigned Opc) { 166 return Opc == ARM::LDR || isT2i32Load(Opc); 167} 168 169static bool isT2i32Store(unsigned Opc) { 170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 171} 172 173static bool isi32Store(unsigned Opc) { 174 return Opc == ARM::STR || isT2i32Store(Opc); 175} 176 177/// MergeOps - Create and insert a LDM or STM with Base as base register and 178/// registers in Regs as the register operands that would be loaded / stored. 179/// It returns true if the transformation is done. 180bool 181ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 182 MachineBasicBlock::iterator MBBI, 183 int Offset, unsigned Base, bool BaseKill, 184 int Opcode, ARMCC::CondCodes Pred, 185 unsigned PredReg, unsigned Scratch, DebugLoc dl, 186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 187 // Only a single register to load / store. Don't bother. 188 unsigned NumRegs = Regs.size(); 189 if (NumRegs <= 1) 190 return false; 191 192 ARM_AM::AMSubMode Mode = ARM_AM::ia; 193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 194 if (isAM4 && Offset == 4) { 195 if (isThumb2) 196 // Thumb2 does not support ldmib / stmib. 197 return false; 198 Mode = ARM_AM::ib; 199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 200 if (isThumb2) 201 // Thumb2 does not support ldmda / stmda. 202 return false; 203 Mode = ARM_AM::da; 204 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 205 Mode = ARM_AM::db; 206 } else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); 247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 248 Opcode == ARM::VLDRD); 249 Opcode = getLoadStoreMultipleOpcode(Opcode); 250 MachineInstrBuilder MIB = (isAM4) 251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 252 .addReg(Base, getKillRegState(BaseKill)) 253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 255 .addReg(Base, getKillRegState(BaseKill)) 256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) 257 .addImm(Pred).addReg(PredReg); 258 for (unsigned i = 0; i != NumRegs; ++i) 259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 260 | getKillRegState(Regs[i].second)); 261 262 return true; 263} 264 265// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 266// success. 267void ARMLoadStoreOpt:: 268MergeOpsUpdate(MachineBasicBlock &MBB, 269 MemOpQueue &memOps, 270 unsigned memOpsBegin, 271 unsigned memOpsEnd, 272 unsigned insertAfter, 273 int Offset, 274 unsigned Base, 275 bool BaseKill, 276 int Opcode, 277 ARMCC::CondCodes Pred, 278 unsigned PredReg, 279 unsigned Scratch, 280 DebugLoc dl, 281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 282 // First calculate which of the registers should be killed by the merged 283 // instruction. 284 SmallVector<std::pair<unsigned, bool>, 8> Regs; 285 const unsigned insertPos = memOps[insertAfter].Position; 286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0); 288 unsigned Reg = MO.getReg(); 289 bool isKill = MO.isKill(); 290 291 // If we are inserting the merged operation after an unmerged operation that 292 // uses the same register, make sure to transfer any kill flag. 293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) 294 if (memOps[j].Position<insertPos) { 295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 296 if (MOJ.getReg() == Reg && MOJ.isKill()) 297 isKill = true; 298 } 299 300 Regs.push_back(std::make_pair(Reg, isKill)); 301 } 302 303 // Try to do the merge. 304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 305 Loc++; 306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 307 Pred, PredReg, Scratch, dl, Regs)) 308 return; 309 310 // Merge succeeded, update records. 311 Merges.push_back(prior(Loc)); 312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 313 // Remove kill flags from any unmerged memops that come before insertPos. 314 if (Regs[i-memOpsBegin].second) 315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) 316 if (memOps[j].Position<insertPos) { 317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) 319 MOJ.setIsKill(false); 320 } 321 MBB.erase(memOps[i].MBBI); 322 memOps[i].Merged = true; 323 } 324} 325 326/// MergeLDR_STR - Merge a number of load / store instructions into one or more 327/// load / store multiple instructions. 328void 329ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 330 unsigned Base, int Opcode, unsigned Size, 331 ARMCC::CondCodes Pred, unsigned PredReg, 332 unsigned Scratch, MemOpQueue &MemOps, 333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 335 int Offset = MemOps[SIndex].Offset; 336 int SOffset = Offset; 337 unsigned insertAfter = SIndex; 338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 339 DebugLoc dl = Loc->getDebugLoc(); 340 const MachineOperand &PMO = Loc->getOperand(0); 341 unsigned PReg = PMO.getReg(); 342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 343 : ARMRegisterInfo::getRegisterNumbering(PReg); 344 unsigned Count = 1; 345 346 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 347 int NewOffset = MemOps[i].Offset; 348 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 349 unsigned Reg = MO.getReg(); 350 unsigned RegNum = MO.isUndef() ? UINT_MAX 351 : ARMRegisterInfo::getRegisterNumbering(Reg); 352 // AM4 - register numbers in ascending order. 353 // AM5 - consecutive register numbers in ascending order. 354 // Can only do up to 16 double-word registers per insn. 355 if (Reg != ARM::SP && 356 NewOffset == Offset + (int)Size && 357 ((isAM4 && RegNum > PRegNum) 358 || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { 359 Offset += Size; 360 PRegNum = RegNum; 361 ++Count; 362 } else { 363 // Can't merge this in. Try merge the earlier ones first. 364 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 365 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 366 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 367 MemOps, Merges); 368 return; 369 } 370 371 if (MemOps[i].Position > MemOps[insertAfter].Position) 372 insertAfter = i; 373 } 374 375 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 376 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 377 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 378 return; 379} 380 381static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 382 unsigned Bytes, unsigned Limit, 383 ARMCC::CondCodes Pred, unsigned PredReg){ 384 unsigned MyPredReg = 0; 385 if (!MI) 386 return false; 387 if (MI->getOpcode() != ARM::t2SUBri && 388 MI->getOpcode() != ARM::t2SUBrSPi && 389 MI->getOpcode() != ARM::t2SUBrSPi12 && 390 MI->getOpcode() != ARM::tSUBspi && 391 MI->getOpcode() != ARM::SUBri) 392 return false; 393 394 // Make sure the offset fits in 8 bits. 395 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 396 return false; 397 398 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 399 return (MI->getOperand(0).getReg() == Base && 400 MI->getOperand(1).getReg() == Base && 401 (MI->getOperand(2).getImm()*Scale) == Bytes && 402 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 403 MyPredReg == PredReg); 404} 405 406static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 407 unsigned Bytes, unsigned Limit, 408 ARMCC::CondCodes Pred, unsigned PredReg){ 409 unsigned MyPredReg = 0; 410 if (!MI) 411 return false; 412 if (MI->getOpcode() != ARM::t2ADDri && 413 MI->getOpcode() != ARM::t2ADDrSPi && 414 MI->getOpcode() != ARM::t2ADDrSPi12 && 415 MI->getOpcode() != ARM::tADDspi && 416 MI->getOpcode() != ARM::ADDri) 417 return false; 418 419 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 420 // Make sure the offset fits in 8 bits. 421 return false; 422 423 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 424 return (MI->getOperand(0).getReg() == Base && 425 MI->getOperand(1).getReg() == Base && 426 (MI->getOperand(2).getImm()*Scale) == Bytes && 427 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 428 MyPredReg == PredReg); 429} 430 431static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 432 switch (MI->getOpcode()) { 433 default: return 0; 434 case ARM::LDR: 435 case ARM::STR: 436 case ARM::t2LDRi8: 437 case ARM::t2LDRi12: 438 case ARM::t2STRi8: 439 case ARM::t2STRi12: 440 case ARM::VLDRS: 441 case ARM::VSTRS: 442 return 4; 443 case ARM::VLDRD: 444 case ARM::VSTRD: 445 return 8; 446 case ARM::LDM: 447 case ARM::STM: 448 case ARM::t2LDM: 449 case ARM::t2STM: 450 return (MI->getNumOperands() - 4) * 4; 451 case ARM::VLDMS: 452 case ARM::VSTMS: 453 case ARM::VLDMD: 454 case ARM::VSTMD: 455 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 456 } 457} 458 459static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 460 switch (Opc) { 461 case ARM::LDM: return ARM::LDM_UPD; 462 case ARM::STM: return ARM::STM_UPD; 463 case ARM::t2LDM: return ARM::t2LDM_UPD; 464 case ARM::t2STM: return ARM::t2STM_UPD; 465 case ARM::VLDMS: return ARM::VLDMS_UPD; 466 case ARM::VLDMD: return ARM::VLDMD_UPD; 467 case ARM::VSTMS: return ARM::VSTMS_UPD; 468 case ARM::VSTMD: return ARM::VSTMD_UPD; 469 default: llvm_unreachable("Unhandled opcode!"); 470 } 471 return 0; 472} 473 474/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 475/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 476/// 477/// stmia rn, <ra, rb, rc> 478/// rn := rn + 4 * 3; 479/// => 480/// stmia rn!, <ra, rb, rc> 481/// 482/// rn := rn - 4 * 3; 483/// ldmia rn, <ra, rb, rc> 484/// => 485/// ldmdb rn!, <ra, rb, rc> 486bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 487 MachineBasicBlock::iterator MBBI, 488 bool &Advance, 489 MachineBasicBlock::iterator &I) { 490 MachineInstr *MI = MBBI; 491 unsigned Base = MI->getOperand(0).getReg(); 492 bool BaseKill = MI->getOperand(0).isKill(); 493 unsigned Bytes = getLSMultipleTransferSize(MI); 494 unsigned PredReg = 0; 495 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 496 int Opcode = MI->getOpcode(); 497 DebugLoc dl = MI->getDebugLoc(); 498 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || 499 Opcode == ARM::STM || Opcode == ARM::t2STM); 500 501 bool DoMerge = false; 502 ARM_AM::AMSubMode Mode = ARM_AM::ia; 503 unsigned Offset = 0; 504 505 if (isAM4) { 506 // Can't use an updating ld/st if the base register is also a dest 507 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 508 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 509 if (MI->getOperand(i).getReg() == Base) 510 return false; 511 } 512 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 513 } else { 514 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 515 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 516 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 517 } 518 519 // Try merging with the previous instruction. 520 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 521 if (MBBI != BeginMBBI) { 522 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 523 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 524 --PrevMBBI; 525 if (isAM4) { 526 if (Mode == ARM_AM::ia && 527 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 528 DoMerge = true; 529 Mode = ARM_AM::db; 530 } else if (isAM4 && Mode == ARM_AM::ib && 531 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 532 DoMerge = true; 533 Mode = ARM_AM::da; 534 } 535 } else { 536 if (Mode == ARM_AM::ia && 537 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 538 Mode = ARM_AM::db; 539 DoMerge = true; 540 } 541 } 542 if (DoMerge) 543 MBB.erase(PrevMBBI); 544 } 545 546 // Try merging with the next instruction. 547 MachineBasicBlock::iterator EndMBBI = MBB.end(); 548 if (!DoMerge && MBBI != EndMBBI) { 549 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 550 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 551 ++NextMBBI; 552 if (isAM4) { 553 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 554 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 555 DoMerge = true; 556 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 557 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 558 DoMerge = true; 559 } 560 } else { 561 if (Mode == ARM_AM::ia && 562 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 563 DoMerge = true; 564 } 565 } 566 if (DoMerge) { 567 if (NextMBBI == I) { 568 Advance = true; 569 ++I; 570 } 571 MBB.erase(NextMBBI); 572 } 573 } 574 575 if (!DoMerge) 576 return false; 577 578 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 579 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 580 .addReg(Base, getDefRegState(true)) // WB base register 581 .addReg(Base, getKillRegState(BaseKill)); 582 if (isAM4) { 583 // [t2]LDM_UPD, [t2]STM_UPD 584 MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) 585 .addImm(Pred).addReg(PredReg); 586 } else { 587 // VLDM[SD}_UPD, VSTM[SD]_UPD 588 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) 589 .addImm(Pred).addReg(PredReg); 590 } 591 // Transfer the rest of operands. 592 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 593 MIB.addOperand(MI->getOperand(OpNum)); 594 // Transfer memoperands. 595 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 596 597 MBB.erase(MBBI); 598 return true; 599} 600 601static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 602 switch (Opc) { 603 case ARM::LDR: return ARM::LDR_PRE; 604 case ARM::STR: return ARM::STR_PRE; 605 case ARM::VLDRS: return ARM::VLDMS_UPD; 606 case ARM::VLDRD: return ARM::VLDMD_UPD; 607 case ARM::VSTRS: return ARM::VSTMS_UPD; 608 case ARM::VSTRD: return ARM::VSTMD_UPD; 609 case ARM::t2LDRi8: 610 case ARM::t2LDRi12: 611 return ARM::t2LDR_PRE; 612 case ARM::t2STRi8: 613 case ARM::t2STRi12: 614 return ARM::t2STR_PRE; 615 default: llvm_unreachable("Unhandled opcode!"); 616 } 617 return 0; 618} 619 620static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 621 switch (Opc) { 622 case ARM::LDR: return ARM::LDR_POST; 623 case ARM::STR: return ARM::STR_POST; 624 case ARM::VLDRS: return ARM::VLDMS_UPD; 625 case ARM::VLDRD: return ARM::VLDMD_UPD; 626 case ARM::VSTRS: return ARM::VSTMS_UPD; 627 case ARM::VSTRD: return ARM::VSTMD_UPD; 628 case ARM::t2LDRi8: 629 case ARM::t2LDRi12: 630 return ARM::t2LDR_POST; 631 case ARM::t2STRi8: 632 case ARM::t2STRi12: 633 return ARM::t2STR_POST; 634 default: llvm_unreachable("Unhandled opcode!"); 635 } 636 return 0; 637} 638 639/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 640/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 641bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 642 MachineBasicBlock::iterator MBBI, 643 const TargetInstrInfo *TII, 644 bool &Advance, 645 MachineBasicBlock::iterator &I) { 646 MachineInstr *MI = MBBI; 647 unsigned Base = MI->getOperand(1).getReg(); 648 bool BaseKill = MI->getOperand(1).isKill(); 649 unsigned Bytes = getLSMultipleTransferSize(MI); 650 int Opcode = MI->getOpcode(); 651 DebugLoc dl = MI->getDebugLoc(); 652 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 653 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 654 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 655 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 656 return false; 657 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 658 return false; 659 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 660 if (MI->getOperand(2).getImm() != 0) 661 return false; 662 663 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 664 // Can't do the merge if the destination register is the same as the would-be 665 // writeback register. 666 if (isLd && MI->getOperand(0).getReg() == Base) 667 return false; 668 669 unsigned PredReg = 0; 670 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 671 bool DoMerge = false; 672 ARM_AM::AddrOpc AddSub = ARM_AM::add; 673 unsigned NewOpc = 0; 674 // AM2 - 12 bits, thumb2 - 8 bits. 675 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 676 677 // Try merging with the previous instruction. 678 MachineBasicBlock::iterator BeginMBBI = MBB.begin(); 679 if (MBBI != BeginMBBI) { 680 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 681 while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) 682 --PrevMBBI; 683 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 684 DoMerge = true; 685 AddSub = ARM_AM::sub; 686 } else if (!isAM5 && 687 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 688 DoMerge = true; 689 } 690 if (DoMerge) { 691 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 692 MBB.erase(PrevMBBI); 693 } 694 } 695 696 // Try merging with the next instruction. 697 MachineBasicBlock::iterator EndMBBI = MBB.begin(); 698 if (!DoMerge && MBBI != EndMBBI) { 699 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 700 while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) 701 ++NextMBBI; 702 if (!isAM5 && 703 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 704 DoMerge = true; 705 AddSub = ARM_AM::sub; 706 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 707 DoMerge = true; 708 } 709 if (DoMerge) { 710 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 711 if (NextMBBI == I) { 712 Advance = true; 713 ++I; 714 } 715 MBB.erase(NextMBBI); 716 } 717 } 718 719 if (!DoMerge) 720 return false; 721 722 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 723 unsigned Offset = 0; 724 if (isAM5) 725 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, 726 (isDPR ? 2 : 1)); 727 else if (isAM2) 728 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 729 else 730 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 731 732 if (isAM5) { 733 // VLDM[SD}_UPD, VSTM[SD]_UPD 734 MachineOperand &MO = MI->getOperand(0); 735 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 736 .addReg(Base, getDefRegState(true)) // WB base register 737 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 738 .addImm(Offset) 739 .addImm(Pred).addReg(PredReg) 740 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 741 getKillRegState(MO.isKill()))); 742 } else if (isLd) { 743 if (isAM2) 744 // LDR_PRE, LDR_POST, 745 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 746 .addReg(Base, RegState::Define) 747 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 748 else 749 // t2LDR_PRE, t2LDR_POST 750 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 751 .addReg(Base, RegState::Define) 752 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 753 } else { 754 MachineOperand &MO = MI->getOperand(0); 755 if (isAM2) 756 // STR_PRE, STR_POST 757 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 758 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 759 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 760 else 761 // t2STR_PRE, t2STR_POST 762 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 763 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 764 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 765 } 766 MBB.erase(MBBI); 767 768 return true; 769} 770 771/// isMemoryOp - Returns true if instruction is a memory operations (that this 772/// pass is capable of operating on). 773static bool isMemoryOp(const MachineInstr *MI) { 774 if (MI->hasOneMemOperand()) { 775 const MachineMemOperand *MMO = *MI->memoperands_begin(); 776 777 // Don't touch volatile memory accesses - we may be changing their order. 778 if (MMO->isVolatile()) 779 return false; 780 781 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 782 // not. 783 if (MMO->getAlignment() < 4) 784 return false; 785 } 786 787 // str <undef> could probably be eliminated entirely, but for now we just want 788 // to avoid making a mess of it. 789 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 790 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 791 MI->getOperand(0).isUndef()) 792 return false; 793 794 // Likewise don't mess with references to undefined addresses. 795 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 796 MI->getOperand(1).isUndef()) 797 return false; 798 799 int Opcode = MI->getOpcode(); 800 switch (Opcode) { 801 default: break; 802 case ARM::LDR: 803 case ARM::STR: 804 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 805 case ARM::VLDRS: 806 case ARM::VSTRS: 807 return MI->getOperand(1).isReg(); 808 case ARM::VLDRD: 809 case ARM::VSTRD: 810 return MI->getOperand(1).isReg(); 811 case ARM::t2LDRi8: 812 case ARM::t2LDRi12: 813 case ARM::t2STRi8: 814 case ARM::t2STRi12: 815 return MI->getOperand(1).isReg(); 816 } 817 return false; 818} 819 820/// AdvanceRS - Advance register scavenger to just before the earliest memory 821/// op that is being merged. 822void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 823 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 824 unsigned Position = MemOps[0].Position; 825 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 826 if (MemOps[i].Position < Position) { 827 Position = MemOps[i].Position; 828 Loc = MemOps[i].MBBI; 829 } 830 } 831 832 if (Loc != MBB.begin()) 833 RS->forward(prior(Loc)); 834} 835 836static int getMemoryOpOffset(const MachineInstr *MI) { 837 int Opcode = MI->getOpcode(); 838 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 839 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 840 unsigned NumOperands = MI->getDesc().getNumOperands(); 841 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 842 843 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 844 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 845 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 846 return OffField; 847 848 int Offset = isAM2 849 ? ARM_AM::getAM2Offset(OffField) 850 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 851 : ARM_AM::getAM5Offset(OffField) * 4); 852 if (isAM2) { 853 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 854 Offset = -Offset; 855 } else if (isAM3) { 856 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 857 Offset = -Offset; 858 } else { 859 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 860 Offset = -Offset; 861 } 862 return Offset; 863} 864 865static void InsertLDR_STR(MachineBasicBlock &MBB, 866 MachineBasicBlock::iterator &MBBI, 867 int OffImm, bool isDef, 868 DebugLoc dl, unsigned NewOpc, 869 unsigned Reg, bool RegDeadKill, bool RegUndef, 870 unsigned BaseReg, bool BaseKill, bool BaseUndef, 871 unsigned OffReg, bool OffKill, bool OffUndef, 872 ARMCC::CondCodes Pred, unsigned PredReg, 873 const TargetInstrInfo *TII, bool isT2) { 874 int Offset = OffImm; 875 if (!isT2) { 876 if (OffImm < 0) 877 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 878 else 879 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 880 } 881 if (isDef) { 882 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 883 TII->get(NewOpc)) 884 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 885 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 886 if (!isT2) 887 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 888 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 889 } else { 890 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 891 TII->get(NewOpc)) 892 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 893 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 894 if (!isT2) 895 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 896 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 897 } 898} 899 900bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 901 MachineBasicBlock::iterator &MBBI) { 902 MachineInstr *MI = &*MBBI; 903 unsigned Opcode = MI->getOpcode(); 904 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 905 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 906 unsigned EvenReg = MI->getOperand(0).getReg(); 907 unsigned OddReg = MI->getOperand(1).getReg(); 908 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 909 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 910 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 911 return false; 912 913 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 914 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 915 bool EvenDeadKill = isLd ? 916 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 917 bool EvenUndef = MI->getOperand(0).isUndef(); 918 bool OddDeadKill = isLd ? 919 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 920 bool OddUndef = MI->getOperand(1).isUndef(); 921 const MachineOperand &BaseOp = MI->getOperand(2); 922 unsigned BaseReg = BaseOp.getReg(); 923 bool BaseKill = BaseOp.isKill(); 924 bool BaseUndef = BaseOp.isUndef(); 925 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 926 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 927 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 928 int OffImm = getMemoryOpOffset(MI); 929 unsigned PredReg = 0; 930 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 931 932 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 933 // Ascending register numbers and no offset. It's safe to change it to a 934 // ldm or stm. 935 unsigned NewOpc = (isLd) 936 ? (isT2 ? ARM::t2LDM : ARM::LDM) 937 : (isT2 ? ARM::t2STM : ARM::STM); 938 if (isLd) { 939 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 940 .addReg(BaseReg, getKillRegState(BaseKill)) 941 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 942 .addImm(Pred).addReg(PredReg) 943 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 944 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 945 ++NumLDRD2LDM; 946 } else { 947 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 948 .addReg(BaseReg, getKillRegState(BaseKill)) 949 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 950 .addImm(Pred).addReg(PredReg) 951 .addReg(EvenReg, 952 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 953 .addReg(OddReg, 954 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 955 ++NumSTRD2STM; 956 } 957 } else { 958 // Split into two instructions. 959 assert((!isT2 || !OffReg) && 960 "Thumb2 ldrd / strd does not encode offset register!"); 961 unsigned NewOpc = (isLd) 962 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 963 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 964 DebugLoc dl = MBBI->getDebugLoc(); 965 // If this is a load and base register is killed, it may have been 966 // re-defed by the load, make sure the first load does not clobber it. 967 if (isLd && 968 (BaseKill || OffKill) && 969 (TRI->regsOverlap(EvenReg, BaseReg) || 970 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 971 assert(!TRI->regsOverlap(OddReg, BaseReg) && 972 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 973 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 974 OddReg, OddDeadKill, false, 975 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 976 Pred, PredReg, TII, isT2); 977 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 978 EvenReg, EvenDeadKill, false, 979 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 980 Pred, PredReg, TII, isT2); 981 } else { 982 if (OddReg == EvenReg && EvenDeadKill) { 983 // If the two source operands are the same, the kill marker is 984 // probably on the first one. e.g. 985 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 986 EvenDeadKill = false; 987 OddDeadKill = true; 988 } 989 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 990 EvenReg, EvenDeadKill, EvenUndef, 991 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 992 Pred, PredReg, TII, isT2); 993 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 994 OddReg, OddDeadKill, OddUndef, 995 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 996 Pred, PredReg, TII, isT2); 997 } 998 if (isLd) 999 ++NumLDRD2LDR; 1000 else 1001 ++NumSTRD2STR; 1002 } 1003 1004 MBBI = prior(MBBI); 1005 MBB.erase(MI); 1006 } 1007 return false; 1008} 1009 1010/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 1011/// ops of the same base and incrementing offset into LDM / STM ops. 1012bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 1013 unsigned NumMerges = 0; 1014 unsigned NumMemOps = 0; 1015 MemOpQueue MemOps; 1016 unsigned CurrBase = 0; 1017 int CurrOpc = -1; 1018 unsigned CurrSize = 0; 1019 ARMCC::CondCodes CurrPred = ARMCC::AL; 1020 unsigned CurrPredReg = 0; 1021 unsigned Position = 0; 1022 SmallVector<MachineBasicBlock::iterator,4> Merges; 1023 1024 RS->enterBasicBlock(&MBB); 1025 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1026 while (MBBI != E) { 1027 if (FixInvalidRegPairOp(MBB, MBBI)) 1028 continue; 1029 1030 bool Advance = false; 1031 bool TryMerge = false; 1032 bool Clobber = false; 1033 1034 bool isMemOp = isMemoryOp(MBBI); 1035 if (isMemOp) { 1036 int Opcode = MBBI->getOpcode(); 1037 unsigned Size = getLSMultipleTransferSize(MBBI); 1038 unsigned Base = MBBI->getOperand(1).getReg(); 1039 unsigned PredReg = 0; 1040 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1041 int Offset = getMemoryOpOffset(MBBI); 1042 // Watch out for: 1043 // r4 := ldr [r5] 1044 // r5 := ldr [r5, #4] 1045 // r6 := ldr [r5, #8] 1046 // 1047 // The second ldr has effectively broken the chain even though it 1048 // looks like the later ldr(s) use the same base register. Try to 1049 // merge the ldr's so far, including this one. But don't try to 1050 // combine the following ldr(s). 1051 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1052 if (CurrBase == 0 && !Clobber) { 1053 // Start of a new chain. 1054 CurrBase = Base; 1055 CurrOpc = Opcode; 1056 CurrSize = Size; 1057 CurrPred = Pred; 1058 CurrPredReg = PredReg; 1059 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1060 NumMemOps++; 1061 Advance = true; 1062 } else { 1063 if (Clobber) { 1064 TryMerge = true; 1065 Advance = true; 1066 } 1067 1068 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1069 // No need to match PredReg. 1070 // Continue adding to the queue. 1071 if (Offset > MemOps.back().Offset) { 1072 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1073 NumMemOps++; 1074 Advance = true; 1075 } else { 1076 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1077 I != E; ++I) { 1078 if (Offset < I->Offset) { 1079 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 1080 NumMemOps++; 1081 Advance = true; 1082 break; 1083 } else if (Offset == I->Offset) { 1084 // Collision! This can't be merged! 1085 break; 1086 } 1087 } 1088 } 1089 } 1090 } 1091 } 1092 1093 if (Advance) { 1094 ++Position; 1095 ++MBBI; 1096 if (MBBI == E) 1097 // Reach the end of the block, try merging the memory instructions. 1098 TryMerge = true; 1099 } else 1100 TryMerge = true; 1101 1102 if (TryMerge) { 1103 if (NumMemOps > 1) { 1104 // Try to find a free register to use as a new base in case it's needed. 1105 // First advance to the instruction just before the start of the chain. 1106 AdvanceRS(MBB, MemOps); 1107 // Find a scratch register. 1108 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1109 // Process the load / store instructions. 1110 RS->forward(prior(MBBI)); 1111 1112 // Merge ops. 1113 Merges.clear(); 1114 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1115 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1116 1117 // Try folding preceeding/trailing base inc/dec into the generated 1118 // LDM/STM ops. 1119 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1120 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1121 ++NumMerges; 1122 NumMerges += Merges.size(); 1123 1124 // Try folding preceeding/trailing base inc/dec into those load/store 1125 // that were not merged to form LDM/STM ops. 1126 for (unsigned i = 0; i != NumMemOps; ++i) 1127 if (!MemOps[i].Merged) 1128 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1129 ++NumMerges; 1130 1131 // RS may be pointing to an instruction that's deleted. 1132 RS->skipTo(prior(MBBI)); 1133 } else if (NumMemOps == 1) { 1134 // Try folding preceeding/trailing base inc/dec into the single 1135 // load/store. 1136 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1137 ++NumMerges; 1138 RS->forward(prior(MBBI)); 1139 } 1140 } 1141 1142 CurrBase = 0; 1143 CurrOpc = -1; 1144 CurrSize = 0; 1145 CurrPred = ARMCC::AL; 1146 CurrPredReg = 0; 1147 if (NumMemOps) { 1148 MemOps.clear(); 1149 NumMemOps = 0; 1150 } 1151 1152 // If iterator hasn't been advanced and this is not a memory op, skip it. 1153 // It can't start a new chain anyway. 1154 if (!Advance && !isMemOp && MBBI != E) { 1155 ++Position; 1156 ++MBBI; 1157 } 1158 } 1159 } 1160 return NumMerges > 0; 1161} 1162 1163namespace { 1164 struct OffsetCompare { 1165 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1166 int LOffset = getMemoryOpOffset(LHS); 1167 int ROffset = getMemoryOpOffset(RHS); 1168 assert(LHS == RHS || LOffset != ROffset); 1169 return LOffset > ROffset; 1170 } 1171 }; 1172} 1173 1174/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops 1175/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it 1176/// directly restore the value of LR into pc. 1177/// ldmfd sp!, {..., lr} 1178/// bx lr 1179/// or 1180/// ldmfd sp!, {..., lr} 1181/// mov pc, lr 1182/// => 1183/// ldmfd sp!, {..., pc} 1184bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1185 if (MBB.empty()) return false; 1186 1187 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1188 if (MBBI != MBB.begin() && 1189 (MBBI->getOpcode() == ARM::BX_RET || 1190 MBBI->getOpcode() == ARM::tBX_RET || 1191 MBBI->getOpcode() == ARM::MOVPCLR)) { 1192 MachineInstr *PrevMI = prior(MBBI); 1193 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1194 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1195 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1196 if (MO.getReg() != ARM::LR) 1197 return false; 1198 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1199 PrevMI->setDesc(TII->get(NewOpc)); 1200 MO.setReg(ARM::PC); 1201 MBB.erase(MBBI); 1202 return true; 1203 } 1204 } 1205 return false; 1206} 1207 1208bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1209 const TargetMachine &TM = Fn.getTarget(); 1210 AFI = Fn.getInfo<ARMFunctionInfo>(); 1211 TII = TM.getInstrInfo(); 1212 TRI = TM.getRegisterInfo(); 1213 RS = new RegScavenger(); 1214 isThumb2 = AFI->isThumb2Function(); 1215 1216 bool Modified = false; 1217 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1218 ++MFI) { 1219 MachineBasicBlock &MBB = *MFI; 1220 Modified |= LoadStoreMultipleOpti(MBB); 1221 Modified |= MergeReturnIntoLDM(MBB); 1222 } 1223 1224 delete RS; 1225 return Modified; 1226} 1227 1228 1229/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1230/// load / stores from consecutive locations close to make it more 1231/// likely they will be combined later. 1232 1233namespace { 1234 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1235 static char ID; 1236 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1237 1238 const TargetData *TD; 1239 const TargetInstrInfo *TII; 1240 const TargetRegisterInfo *TRI; 1241 const ARMSubtarget *STI; 1242 MachineRegisterInfo *MRI; 1243 MachineFunction *MF; 1244 1245 virtual bool runOnMachineFunction(MachineFunction &Fn); 1246 1247 virtual const char *getPassName() const { 1248 return "ARM pre- register allocation load / store optimization pass"; 1249 } 1250 1251 private: 1252 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1253 unsigned &NewOpc, unsigned &EvenReg, 1254 unsigned &OddReg, unsigned &BaseReg, 1255 unsigned &OffReg, int &Offset, 1256 unsigned &PredReg, ARMCC::CondCodes &Pred, 1257 bool &isT2); 1258 bool RescheduleOps(MachineBasicBlock *MBB, 1259 SmallVector<MachineInstr*, 4> &Ops, 1260 unsigned Base, bool isLd, 1261 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1262 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1263 }; 1264 char ARMPreAllocLoadStoreOpt::ID = 0; 1265} 1266 1267bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1268 TD = Fn.getTarget().getTargetData(); 1269 TII = Fn.getTarget().getInstrInfo(); 1270 TRI = Fn.getTarget().getRegisterInfo(); 1271 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1272 MRI = &Fn.getRegInfo(); 1273 MF = &Fn; 1274 1275 bool Modified = false; 1276 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1277 ++MFI) 1278 Modified |= RescheduleLoadStoreInstrs(MFI); 1279 1280 return Modified; 1281} 1282 1283static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1284 MachineBasicBlock::iterator I, 1285 MachineBasicBlock::iterator E, 1286 SmallPtrSet<MachineInstr*, 4> &MemOps, 1287 SmallSet<unsigned, 4> &MemRegs, 1288 const TargetRegisterInfo *TRI) { 1289 // Are there stores / loads / calls between them? 1290 // FIXME: This is overly conservative. We should make use of alias information 1291 // some day. 1292 SmallSet<unsigned, 4> AddedRegPressure; 1293 while (++I != E) { 1294 if (MemOps.count(&*I)) 1295 continue; 1296 const TargetInstrDesc &TID = I->getDesc(); 1297 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1298 return false; 1299 if (isLd && TID.mayStore()) 1300 return false; 1301 if (!isLd) { 1302 if (TID.mayLoad()) 1303 return false; 1304 // It's not safe to move the first 'str' down. 1305 // str r1, [r0] 1306 // strh r5, [r0] 1307 // str r4, [r0, #+4] 1308 if (TID.mayStore()) 1309 return false; 1310 } 1311 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1312 MachineOperand &MO = I->getOperand(j); 1313 if (!MO.isReg()) 1314 continue; 1315 unsigned Reg = MO.getReg(); 1316 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1317 return false; 1318 if (Reg != Base && !MemRegs.count(Reg)) 1319 AddedRegPressure.insert(Reg); 1320 } 1321 } 1322 1323 // Estimate register pressure increase due to the transformation. 1324 if (MemRegs.size() <= 4) 1325 // Ok if we are moving small number of instructions. 1326 return true; 1327 return AddedRegPressure.size() <= MemRegs.size() * 2; 1328} 1329 1330bool 1331ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1332 DebugLoc &dl, 1333 unsigned &NewOpc, unsigned &EvenReg, 1334 unsigned &OddReg, unsigned &BaseReg, 1335 unsigned &OffReg, int &Offset, 1336 unsigned &PredReg, 1337 ARMCC::CondCodes &Pred, 1338 bool &isT2) { 1339 // Make sure we're allowed to generate LDRD/STRD. 1340 if (!STI->hasV5TEOps()) 1341 return false; 1342 1343 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1344 unsigned Scale = 1; 1345 unsigned Opcode = Op0->getOpcode(); 1346 if (Opcode == ARM::LDR) 1347 NewOpc = ARM::LDRD; 1348 else if (Opcode == ARM::STR) 1349 NewOpc = ARM::STRD; 1350 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1351 NewOpc = ARM::t2LDRDi8; 1352 Scale = 4; 1353 isT2 = true; 1354 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1355 NewOpc = ARM::t2STRDi8; 1356 Scale = 4; 1357 isT2 = true; 1358 } else 1359 return false; 1360 1361 // Make sure the offset registers match. 1362 if (!isT2 && 1363 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1364 return false; 1365 1366 // Must sure the base address satisfies i64 ld / st alignment requirement. 1367 if (!Op0->hasOneMemOperand() || 1368 !(*Op0->memoperands_begin())->getValue() || 1369 (*Op0->memoperands_begin())->isVolatile()) 1370 return false; 1371 1372 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1373 const Function *Func = MF->getFunction(); 1374 unsigned ReqAlign = STI->hasV6Ops() 1375 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1376 : 8; // Pre-v6 need 8-byte align 1377 if (Align < ReqAlign) 1378 return false; 1379 1380 // Then make sure the immediate offset fits. 1381 int OffImm = getMemoryOpOffset(Op0); 1382 if (isT2) { 1383 if (OffImm < 0) { 1384 if (OffImm < -255) 1385 // Can't fall back to t2LDRi8 / t2STRi8. 1386 return false; 1387 } else { 1388 int Limit = (1 << 8) * Scale; 1389 if (OffImm >= Limit || (OffImm & (Scale-1))) 1390 return false; 1391 } 1392 Offset = OffImm; 1393 } else { 1394 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1395 if (OffImm < 0) { 1396 AddSub = ARM_AM::sub; 1397 OffImm = - OffImm; 1398 } 1399 int Limit = (1 << 8) * Scale; 1400 if (OffImm >= Limit || (OffImm & (Scale-1))) 1401 return false; 1402 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1403 } 1404 EvenReg = Op0->getOperand(0).getReg(); 1405 OddReg = Op1->getOperand(0).getReg(); 1406 if (EvenReg == OddReg) 1407 return false; 1408 BaseReg = Op0->getOperand(1).getReg(); 1409 if (!isT2) 1410 OffReg = Op0->getOperand(2).getReg(); 1411 Pred = llvm::getInstrPredicate(Op0, PredReg); 1412 dl = Op0->getDebugLoc(); 1413 return true; 1414} 1415 1416bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1417 SmallVector<MachineInstr*, 4> &Ops, 1418 unsigned Base, bool isLd, 1419 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1420 bool RetVal = false; 1421 1422 // Sort by offset (in reverse order). 1423 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1424 1425 // The loads / stores of the same base are in order. Scan them from first to 1426 // last and check for the following: 1427 // 1. Any def of base. 1428 // 2. Any gaps. 1429 while (Ops.size() > 1) { 1430 unsigned FirstLoc = ~0U; 1431 unsigned LastLoc = 0; 1432 MachineInstr *FirstOp = 0; 1433 MachineInstr *LastOp = 0; 1434 int LastOffset = 0; 1435 unsigned LastOpcode = 0; 1436 unsigned LastBytes = 0; 1437 unsigned NumMove = 0; 1438 for (int i = Ops.size() - 1; i >= 0; --i) { 1439 MachineInstr *Op = Ops[i]; 1440 unsigned Loc = MI2LocMap[Op]; 1441 if (Loc <= FirstLoc) { 1442 FirstLoc = Loc; 1443 FirstOp = Op; 1444 } 1445 if (Loc >= LastLoc) { 1446 LastLoc = Loc; 1447 LastOp = Op; 1448 } 1449 1450 unsigned Opcode = Op->getOpcode(); 1451 if (LastOpcode && Opcode != LastOpcode) 1452 break; 1453 1454 int Offset = getMemoryOpOffset(Op); 1455 unsigned Bytes = getLSMultipleTransferSize(Op); 1456 if (LastBytes) { 1457 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1458 break; 1459 } 1460 LastOffset = Offset; 1461 LastBytes = Bytes; 1462 LastOpcode = Opcode; 1463 if (++NumMove == 8) // FIXME: Tune this limit. 1464 break; 1465 } 1466 1467 if (NumMove <= 1) 1468 Ops.pop_back(); 1469 else { 1470 SmallPtrSet<MachineInstr*, 4> MemOps; 1471 SmallSet<unsigned, 4> MemRegs; 1472 for (int i = NumMove-1; i >= 0; --i) { 1473 MemOps.insert(Ops[i]); 1474 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1475 } 1476 1477 // Be conservative, if the instructions are too far apart, don't 1478 // move them. We want to limit the increase of register pressure. 1479 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1480 if (DoMove) 1481 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1482 MemOps, MemRegs, TRI); 1483 if (!DoMove) { 1484 for (unsigned i = 0; i != NumMove; ++i) 1485 Ops.pop_back(); 1486 } else { 1487 // This is the new location for the loads / stores. 1488 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1489 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1490 ++InsertPos; 1491 1492 // If we are moving a pair of loads / stores, see if it makes sense 1493 // to try to allocate a pair of registers that can form register pairs. 1494 MachineInstr *Op0 = Ops.back(); 1495 MachineInstr *Op1 = Ops[Ops.size()-2]; 1496 unsigned EvenReg = 0, OddReg = 0; 1497 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1498 ARMCC::CondCodes Pred = ARMCC::AL; 1499 bool isT2 = false; 1500 unsigned NewOpc = 0; 1501 int Offset = 0; 1502 DebugLoc dl; 1503 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1504 EvenReg, OddReg, BaseReg, OffReg, 1505 Offset, PredReg, Pred, isT2)) { 1506 Ops.pop_back(); 1507 Ops.pop_back(); 1508 1509 // Form the pair instruction. 1510 if (isLd) { 1511 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1512 dl, TII->get(NewOpc)) 1513 .addReg(EvenReg, RegState::Define) 1514 .addReg(OddReg, RegState::Define) 1515 .addReg(BaseReg); 1516 if (!isT2) 1517 MIB.addReg(OffReg); 1518 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1519 ++NumLDRDFormed; 1520 } else { 1521 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1522 dl, TII->get(NewOpc)) 1523 .addReg(EvenReg) 1524 .addReg(OddReg) 1525 .addReg(BaseReg); 1526 if (!isT2) 1527 MIB.addReg(OffReg); 1528 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1529 ++NumSTRDFormed; 1530 } 1531 MBB->erase(Op0); 1532 MBB->erase(Op1); 1533 1534 // Add register allocation hints to form register pairs. 1535 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1536 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1537 } else { 1538 for (unsigned i = 0; i != NumMove; ++i) { 1539 MachineInstr *Op = Ops.back(); 1540 Ops.pop_back(); 1541 MBB->splice(InsertPos, MBB, Op); 1542 } 1543 } 1544 1545 NumLdStMoved += NumMove; 1546 RetVal = true; 1547 } 1548 } 1549 } 1550 1551 return RetVal; 1552} 1553 1554bool 1555ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1556 bool RetVal = false; 1557 1558 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1559 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1560 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1561 SmallVector<unsigned, 4> LdBases; 1562 SmallVector<unsigned, 4> StBases; 1563 1564 unsigned Loc = 0; 1565 MachineBasicBlock::iterator MBBI = MBB->begin(); 1566 MachineBasicBlock::iterator E = MBB->end(); 1567 while (MBBI != E) { 1568 for (; MBBI != E; ++MBBI) { 1569 MachineInstr *MI = MBBI; 1570 const TargetInstrDesc &TID = MI->getDesc(); 1571 if (TID.isCall() || TID.isTerminator()) { 1572 // Stop at barriers. 1573 ++MBBI; 1574 break; 1575 } 1576 1577 MI2LocMap[MI] = Loc++; 1578 if (!isMemoryOp(MI)) 1579 continue; 1580 unsigned PredReg = 0; 1581 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1582 continue; 1583 1584 int Opc = MI->getOpcode(); 1585 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1586 unsigned Base = MI->getOperand(1).getReg(); 1587 int Offset = getMemoryOpOffset(MI); 1588 1589 bool StopHere = false; 1590 if (isLd) { 1591 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1592 Base2LdsMap.find(Base); 1593 if (BI != Base2LdsMap.end()) { 1594 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1595 if (Offset == getMemoryOpOffset(BI->second[i])) { 1596 StopHere = true; 1597 break; 1598 } 1599 } 1600 if (!StopHere) 1601 BI->second.push_back(MI); 1602 } else { 1603 SmallVector<MachineInstr*, 4> MIs; 1604 MIs.push_back(MI); 1605 Base2LdsMap[Base] = MIs; 1606 LdBases.push_back(Base); 1607 } 1608 } else { 1609 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1610 Base2StsMap.find(Base); 1611 if (BI != Base2StsMap.end()) { 1612 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1613 if (Offset == getMemoryOpOffset(BI->second[i])) { 1614 StopHere = true; 1615 break; 1616 } 1617 } 1618 if (!StopHere) 1619 BI->second.push_back(MI); 1620 } else { 1621 SmallVector<MachineInstr*, 4> MIs; 1622 MIs.push_back(MI); 1623 Base2StsMap[Base] = MIs; 1624 StBases.push_back(Base); 1625 } 1626 } 1627 1628 if (StopHere) { 1629 // Found a duplicate (a base+offset combination that's seen earlier). 1630 // Backtrack. 1631 --Loc; 1632 break; 1633 } 1634 } 1635 1636 // Re-schedule loads. 1637 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1638 unsigned Base = LdBases[i]; 1639 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1640 if (Lds.size() > 1) 1641 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1642 } 1643 1644 // Re-schedule stores. 1645 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1646 unsigned Base = StBases[i]; 1647 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1648 if (Sts.size() > 1) 1649 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1650 } 1651 1652 if (MBBI != E) { 1653 Base2LdsMap.clear(); 1654 Base2StsMap.clear(); 1655 LdBases.clear(); 1656 StBases.clear(); 1657 } 1658 } 1659 1660 return RetVal; 1661} 1662 1663 1664/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1665/// optimization pass. 1666FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1667 if (PreAlloc) 1668 return new ARMPreAllocLoadStoreOpt(); 1669 return new ARMLoadStoreOpt(); 1670} 1671