ARMLoadStoreOptimizer.cpp revision 2d357f6b44159c59dbb58e03a22f94312696d064
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMBaseInstrInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMRegisterInfo.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/Target/TargetData.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/ADT/DenseMap.h" 35#include "llvm/ADT/STLExtras.h" 36#include "llvm/ADT/SmallPtrSet.h" 37#include "llvm/ADT/SmallSet.h" 38#include "llvm/ADT/SmallVector.h" 39#include "llvm/ADT/Statistic.h" 40using namespace llvm; 41 42STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 43STATISTIC(NumSTMGened , "Number of stm instructions generated"); 44STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); 45STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); 46STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 47STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 48STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 49STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 50STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 51STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 52STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 53 54/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 55/// load / store instructions to form ldm / stm instructions. 56 57namespace { 58 struct ARMLoadStoreOpt : public MachineFunctionPass { 59 static char ID; 60 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 61 62 const TargetInstrInfo *TII; 63 const TargetRegisterInfo *TRI; 64 ARMFunctionInfo *AFI; 65 RegScavenger *RS; 66 bool isThumb2; 67 68 virtual bool runOnMachineFunction(MachineFunction &Fn); 69 70 virtual const char *getPassName() const { 71 return "ARM load / store optimization pass"; 72 } 73 74 private: 75 struct MemOpQueueEntry { 76 int Offset; 77 unsigned Position; 78 MachineBasicBlock::iterator MBBI; 79 bool Merged; 80 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 81 : Offset(o), Position(p), MBBI(i), Merged(false) {} 82 }; 83 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 84 typedef MemOpQueue::iterator MemOpQueueIter; 85 86 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 87 int Offset, unsigned Base, bool BaseKill, int Opcode, 88 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 89 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 90 void MergeOpsUpdate(MachineBasicBlock &MBB, 91 MemOpQueue &MemOps, 92 unsigned memOpsBegin, 93 unsigned memOpsEnd, 94 unsigned insertAfter, 95 int Offset, 96 unsigned Base, 97 bool BaseKill, 98 int Opcode, 99 ARMCC::CondCodes Pred, 100 unsigned PredReg, 101 unsigned Scratch, 102 DebugLoc dl, 103 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 104 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 105 int Opcode, unsigned Size, 106 ARMCC::CondCodes Pred, unsigned PredReg, 107 unsigned Scratch, MemOpQueue &MemOps, 108 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 109 110 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 111 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator &MBBI); 113 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 114 MachineBasicBlock::iterator MBBI, 115 const TargetInstrInfo *TII, 116 bool &Advance, 117 MachineBasicBlock::iterator &I); 118 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 bool &Advance, 121 MachineBasicBlock::iterator &I); 122 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 123 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 124 }; 125 char ARMLoadStoreOpt::ID = 0; 126} 127 128static int getLoadStoreMultipleOpcode(int Opcode) { 129 switch (Opcode) { 130 case ARM::LDR: 131 NumLDMGened++; 132 return ARM::LDM; 133 case ARM::STR: 134 NumSTMGened++; 135 return ARM::STM; 136 case ARM::t2LDRi8: 137 case ARM::t2LDRi12: 138 NumLDMGened++; 139 return ARM::t2LDM; 140 case ARM::t2STRi8: 141 case ARM::t2STRi12: 142 NumSTMGened++; 143 return ARM::t2STM; 144 case ARM::VLDRS: 145 NumVLDMGened++; 146 return ARM::VLDMS; 147 case ARM::VSTRS: 148 NumVSTMGened++; 149 return ARM::VSTMS; 150 case ARM::VLDRD: 151 NumVLDMGened++; 152 return ARM::VLDMD; 153 case ARM::VSTRD: 154 NumVSTMGened++; 155 return ARM::VSTMD; 156 default: llvm_unreachable("Unhandled opcode!"); 157 } 158 return 0; 159} 160 161static bool isT2i32Load(unsigned Opc) { 162 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 163} 164 165static bool isi32Load(unsigned Opc) { 166 return Opc == ARM::LDR || isT2i32Load(Opc); 167} 168 169static bool isT2i32Store(unsigned Opc) { 170 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 171} 172 173static bool isi32Store(unsigned Opc) { 174 return Opc == ARM::STR || isT2i32Store(Opc); 175} 176 177/// MergeOps - Create and insert a LDM or STM with Base as base register and 178/// registers in Regs as the register operands that would be loaded / stored. 179/// It returns true if the transformation is done. 180bool 181ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 182 MachineBasicBlock::iterator MBBI, 183 int Offset, unsigned Base, bool BaseKill, 184 int Opcode, ARMCC::CondCodes Pred, 185 unsigned PredReg, unsigned Scratch, DebugLoc dl, 186 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 187 // Only a single register to load / store. Don't bother. 188 unsigned NumRegs = Regs.size(); 189 if (NumRegs <= 1) 190 return false; 191 192 ARM_AM::AMSubMode Mode = ARM_AM::ia; 193 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 194 if (isAM4 && Offset == 4) { 195 if (isThumb2) 196 // Thumb2 does not support ldmib / stmib. 197 return false; 198 Mode = ARM_AM::ib; 199 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 200 if (isThumb2) 201 // Thumb2 does not support ldmda / stmda. 202 return false; 203 Mode = ARM_AM::da; 204 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 205 Mode = ARM_AM::db; 206 } else if (Offset != 0) { 207 // If starting offset isn't zero, insert a MI to materialize a new base. 208 // But only do so if it is cost effective, i.e. merging more than two 209 // loads / stores. 210 if (NumRegs <= 2) 211 return false; 212 213 unsigned NewBase; 214 if (isi32Load(Opcode)) 215 // If it is a load, then just use one of the destination register to 216 // use as the new base. 217 NewBase = Regs[NumRegs-1].first; 218 else { 219 // Use the scratch register to use as a new base. 220 NewBase = Scratch; 221 if (NewBase == 0) 222 return false; 223 } 224 int BaseOpc = !isThumb2 225 ? ARM::ADDri 226 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 227 if (Offset < 0) { 228 BaseOpc = !isThumb2 229 ? ARM::SUBri 230 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 231 Offset = - Offset; 232 } 233 int ImmedOffset = isThumb2 234 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 235 if (ImmedOffset == -1) 236 // FIXME: Try t2ADDri12 or t2SUBri12? 237 return false; // Probably not worth it then. 238 239 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 240 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 241 .addImm(Pred).addReg(PredReg).addReg(0); 242 Base = NewBase; 243 BaseKill = true; // New base is always killed right its use. 244 } 245 246 bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD); 247 bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || 248 Opcode == ARM::VLDRD); 249 Opcode = getLoadStoreMultipleOpcode(Opcode); 250 MachineInstrBuilder MIB = (isAM4) 251 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 252 .addReg(Base, getKillRegState(BaseKill)) 253 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 254 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 255 .addReg(Base, getKillRegState(BaseKill)) 256 .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) 257 .addImm(Pred).addReg(PredReg); 258 for (unsigned i = 0; i != NumRegs; ++i) 259 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 260 | getKillRegState(Regs[i].second)); 261 262 return true; 263} 264 265// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on 266// success. 267void ARMLoadStoreOpt:: 268MergeOpsUpdate(MachineBasicBlock &MBB, 269 MemOpQueue &memOps, 270 unsigned memOpsBegin, 271 unsigned memOpsEnd, 272 unsigned insertAfter, 273 int Offset, 274 unsigned Base, 275 bool BaseKill, 276 int Opcode, 277 ARMCC::CondCodes Pred, 278 unsigned PredReg, 279 unsigned Scratch, 280 DebugLoc dl, 281 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 282 // First calculate which of the registers should be killed by the merged 283 // instruction. 284 SmallVector<std::pair<unsigned, bool>, 8> Regs; 285 const unsigned insertPos = memOps[insertAfter].Position; 286 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 287 const MachineOperand &MO = memOps[i].MBBI->getOperand(0); 288 unsigned Reg = MO.getReg(); 289 bool isKill = MO.isKill(); 290 291 // If we are inserting the merged operation after an unmerged operation that 292 // uses the same register, make sure to transfer any kill flag. 293 for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) 294 if (memOps[j].Position<insertPos) { 295 const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 296 if (MOJ.getReg() == Reg && MOJ.isKill()) 297 isKill = true; 298 } 299 300 Regs.push_back(std::make_pair(Reg, isKill)); 301 } 302 303 // Try to do the merge. 304 MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; 305 Loc++; 306 if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, 307 Pred, PredReg, Scratch, dl, Regs)) 308 return; 309 310 // Merge succeeded, update records. 311 Merges.push_back(prior(Loc)); 312 for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { 313 // Remove kill flags from any unmerged memops that come before insertPos. 314 if (Regs[i-memOpsBegin].second) 315 for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) 316 if (memOps[j].Position<insertPos) { 317 MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); 318 if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) 319 MOJ.setIsKill(false); 320 } 321 MBB.erase(memOps[i].MBBI); 322 memOps[i].Merged = true; 323 } 324} 325 326/// MergeLDR_STR - Merge a number of load / store instructions into one or more 327/// load / store multiple instructions. 328void 329ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 330 unsigned Base, int Opcode, unsigned Size, 331 ARMCC::CondCodes Pred, unsigned PredReg, 332 unsigned Scratch, MemOpQueue &MemOps, 333 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 334 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 335 int Offset = MemOps[SIndex].Offset; 336 int SOffset = Offset; 337 unsigned insertAfter = SIndex; 338 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 339 DebugLoc dl = Loc->getDebugLoc(); 340 const MachineOperand &PMO = Loc->getOperand(0); 341 unsigned PReg = PMO.getReg(); 342 unsigned PRegNum = PMO.isUndef() ? UINT_MAX 343 : ARMRegisterInfo::getRegisterNumbering(PReg); 344 345 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 346 int NewOffset = MemOps[i].Offset; 347 const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); 348 unsigned Reg = MO.getReg(); 349 unsigned RegNum = MO.isUndef() ? UINT_MAX 350 : ARMRegisterInfo::getRegisterNumbering(Reg); 351 // AM4 - register numbers in ascending order. 352 // AM5 - consecutive register numbers in ascending order. 353 if (Reg != ARM::SP && 354 NewOffset == Offset + (int)Size && 355 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 356 Offset += Size; 357 PRegNum = RegNum; 358 } else { 359 // Can't merge this in. Try merge the earlier ones first. 360 MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, 361 Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); 362 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 363 MemOps, Merges); 364 return; 365 } 366 367 if (MemOps[i].Position > MemOps[insertAfter].Position) 368 insertAfter = i; 369 } 370 371 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 372 MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, 373 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); 374 return; 375} 376 377static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 378 unsigned Bytes, unsigned Limit, 379 ARMCC::CondCodes Pred, unsigned PredReg){ 380 unsigned MyPredReg = 0; 381 if (!MI) 382 return false; 383 if (MI->getOpcode() != ARM::t2SUBri && 384 MI->getOpcode() != ARM::t2SUBrSPi && 385 MI->getOpcode() != ARM::t2SUBrSPi12 && 386 MI->getOpcode() != ARM::tSUBspi && 387 MI->getOpcode() != ARM::SUBri) 388 return false; 389 390 // Make sure the offset fits in 8 bits. 391 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 392 return false; 393 394 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 395 return (MI->getOperand(0).getReg() == Base && 396 MI->getOperand(1).getReg() == Base && 397 (MI->getOperand(2).getImm()*Scale) == Bytes && 398 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 399 MyPredReg == PredReg); 400} 401 402static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 403 unsigned Bytes, unsigned Limit, 404 ARMCC::CondCodes Pred, unsigned PredReg){ 405 unsigned MyPredReg = 0; 406 if (!MI) 407 return false; 408 if (MI->getOpcode() != ARM::t2ADDri && 409 MI->getOpcode() != ARM::t2ADDrSPi && 410 MI->getOpcode() != ARM::t2ADDrSPi12 && 411 MI->getOpcode() != ARM::tADDspi && 412 MI->getOpcode() != ARM::ADDri) 413 return false; 414 415 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 416 // Make sure the offset fits in 8 bits. 417 return false; 418 419 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 420 return (MI->getOperand(0).getReg() == Base && 421 MI->getOperand(1).getReg() == Base && 422 (MI->getOperand(2).getImm()*Scale) == Bytes && 423 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 424 MyPredReg == PredReg); 425} 426 427static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 428 switch (MI->getOpcode()) { 429 default: return 0; 430 case ARM::LDR: 431 case ARM::STR: 432 case ARM::t2LDRi8: 433 case ARM::t2LDRi12: 434 case ARM::t2STRi8: 435 case ARM::t2STRi12: 436 case ARM::VLDRS: 437 case ARM::VSTRS: 438 return 4; 439 case ARM::VLDRD: 440 case ARM::VSTRD: 441 return 8; 442 case ARM::LDM: 443 case ARM::STM: 444 case ARM::t2LDM: 445 case ARM::t2STM: 446 return (MI->getNumOperands() - 4) * 4; 447 case ARM::VLDMS: 448 case ARM::VSTMS: 449 case ARM::VLDMD: 450 case ARM::VSTMD: 451 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 452 } 453} 454 455static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) { 456 switch (Opc) { 457 case ARM::LDM: return ARM::LDM_UPD; 458 case ARM::STM: return ARM::STM_UPD; 459 case ARM::t2LDM: return ARM::t2LDM_UPD; 460 case ARM::t2STM: return ARM::t2STM_UPD; 461 case ARM::VLDMS: return ARM::VLDMS_UPD; 462 case ARM::VLDMD: return ARM::VLDMD_UPD; 463 case ARM::VSTMS: return ARM::VSTMS_UPD; 464 case ARM::VSTMD: return ARM::VSTMD_UPD; 465 default: llvm_unreachable("Unhandled opcode!"); 466 } 467 return 0; 468} 469 470/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 471/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: 472/// 473/// stmia rn, <ra, rb, rc> 474/// rn := rn + 4 * 3; 475/// => 476/// stmia rn!, <ra, rb, rc> 477/// 478/// rn := rn - 4 * 3; 479/// ldmia rn, <ra, rb, rc> 480/// => 481/// ldmdb rn!, <ra, rb, rc> 482bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 483 MachineBasicBlock::iterator MBBI, 484 bool &Advance, 485 MachineBasicBlock::iterator &I) { 486 MachineInstr *MI = MBBI; 487 unsigned Base = MI->getOperand(0).getReg(); 488 bool BaseKill = MI->getOperand(0).isKill(); 489 unsigned Bytes = getLSMultipleTransferSize(MI); 490 unsigned PredReg = 0; 491 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 492 int Opcode = MI->getOpcode(); 493 DebugLoc dl = MI->getDebugLoc(); 494 bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM || 495 Opcode == ARM::STM || Opcode == ARM::t2STM); 496 497 bool DoMerge = false; 498 ARM_AM::AMSubMode Mode = ARM_AM::ia; 499 unsigned Offset = 0; 500 501 if (isAM4) { 502 // Can't use an updating ld/st if the base register is also a dest 503 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 504 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 505 if (MI->getOperand(i).getReg() == Base) 506 return false; 507 } 508 Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 509 } else { 510 // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. 511 Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 512 Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 513 } 514 515 // Try merging with the previous instruction. 516 if (MBBI != MBB.begin()) { 517 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 518 if (isAM4) { 519 if (Mode == ARM_AM::ia && 520 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 521 DoMerge = true; 522 Mode = ARM_AM::db; 523 } else if (isAM4 && Mode == ARM_AM::ib && 524 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 525 DoMerge = true; 526 Mode = ARM_AM::da; 527 } 528 } else { 529 if (Mode == ARM_AM::ia && 530 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 531 Mode = ARM_AM::db; 532 DoMerge = true; 533 } 534 } 535 if (DoMerge) 536 MBB.erase(PrevMBBI); 537 } 538 539 // Try merging with the next instruction. 540 if (!DoMerge && MBBI != MBB.end()) { 541 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 542 if (isAM4) { 543 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 544 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 545 DoMerge = true; 546 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 547 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 548 DoMerge = true; 549 } 550 } else { 551 if (Mode == ARM_AM::ia && 552 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 553 DoMerge = true; 554 } 555 } 556 if (DoMerge) { 557 if (NextMBBI == I) { 558 Advance = true; 559 ++I; 560 } 561 MBB.erase(NextMBBI); 562 } 563 } 564 565 if (!DoMerge) 566 return false; 567 568 unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode); 569 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 570 .addReg(Base, getDefRegState(true)) // WB base register 571 .addReg(Base, getKillRegState(BaseKill)); 572 if (isAM4) { 573 // [t2]LDM_UPD, [t2]STM_UPD 574 MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) 575 .addImm(Pred).addReg(PredReg); 576 } else { 577 // VLDM[SD}_UPD, VSTM[SD]_UPD 578 MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) 579 .addImm(Pred).addReg(PredReg); 580 } 581 // Transfer the rest of operands. 582 for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum) 583 MIB.addOperand(MI->getOperand(OpNum)); 584 // Transfer memoperands. 585 (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); 586 587 MBB.erase(MBBI); 588 return true; 589} 590 591static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 592 switch (Opc) { 593 case ARM::LDR: return ARM::LDR_PRE; 594 case ARM::STR: return ARM::STR_PRE; 595 case ARM::VLDRS: return ARM::VLDMS_UPD; 596 case ARM::VLDRD: return ARM::VLDMD_UPD; 597 case ARM::VSTRS: return ARM::VSTMS_UPD; 598 case ARM::VSTRD: return ARM::VSTMD_UPD; 599 case ARM::t2LDRi8: 600 case ARM::t2LDRi12: 601 return ARM::t2LDR_PRE; 602 case ARM::t2STRi8: 603 case ARM::t2STRi12: 604 return ARM::t2STR_PRE; 605 default: llvm_unreachable("Unhandled opcode!"); 606 } 607 return 0; 608} 609 610static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 611 switch (Opc) { 612 case ARM::LDR: return ARM::LDR_POST; 613 case ARM::STR: return ARM::STR_POST; 614 case ARM::VLDRS: return ARM::VLDMS_UPD; 615 case ARM::VLDRD: return ARM::VLDMD_UPD; 616 case ARM::VSTRS: return ARM::VSTMS_UPD; 617 case ARM::VSTRD: return ARM::VSTMD_UPD; 618 case ARM::t2LDRi8: 619 case ARM::t2LDRi12: 620 return ARM::t2LDR_POST; 621 case ARM::t2STRi8: 622 case ARM::t2STRi12: 623 return ARM::t2STR_POST; 624 default: llvm_unreachable("Unhandled opcode!"); 625 } 626 return 0; 627} 628 629/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 630/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 631bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 632 MachineBasicBlock::iterator MBBI, 633 const TargetInstrInfo *TII, 634 bool &Advance, 635 MachineBasicBlock::iterator &I) { 636 MachineInstr *MI = MBBI; 637 unsigned Base = MI->getOperand(1).getReg(); 638 bool BaseKill = MI->getOperand(1).isKill(); 639 unsigned Bytes = getLSMultipleTransferSize(MI); 640 int Opcode = MI->getOpcode(); 641 DebugLoc dl = MI->getDebugLoc(); 642 bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || 643 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS); 644 bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR); 645 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 646 return false; 647 if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 648 return false; 649 if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 650 if (MI->getOperand(2).getImm() != 0) 651 return false; 652 653 bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; 654 // Can't do the merge if the destination register is the same as the would-be 655 // writeback register. 656 if (isLd && MI->getOperand(0).getReg() == Base) 657 return false; 658 659 unsigned PredReg = 0; 660 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 661 bool DoMerge = false; 662 ARM_AM::AddrOpc AddSub = ARM_AM::add; 663 unsigned NewOpc = 0; 664 // AM2 - 12 bits, thumb2 - 8 bits. 665 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 666 667 // Try merging with the previous instruction. 668 if (MBBI != MBB.begin()) { 669 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 670 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 671 DoMerge = true; 672 AddSub = ARM_AM::sub; 673 } else if (!isAM5 && 674 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 675 DoMerge = true; 676 } 677 if (DoMerge) { 678 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 679 MBB.erase(PrevMBBI); 680 } 681 } 682 683 // Try merging with the next instruction. 684 if (!DoMerge && MBBI != MBB.end()) { 685 MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); 686 if (!isAM5 && 687 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 688 DoMerge = true; 689 AddSub = ARM_AM::sub; 690 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 691 DoMerge = true; 692 } 693 if (DoMerge) { 694 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 695 if (NextMBBI == I) { 696 Advance = true; 697 ++I; 698 } 699 MBB.erase(NextMBBI); 700 } 701 } 702 703 if (!DoMerge) 704 return false; 705 706 bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; 707 unsigned Offset = 0; 708 if (isAM5) 709 Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, 710 (isDPR ? 2 : 1)); 711 else if (isAM2) 712 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 713 else 714 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 715 716 if (isAM5) { 717 // VLDM[SD}_UPD, VSTM[SD]_UPD 718 MachineOperand &MO = MI->getOperand(0); 719 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 720 .addReg(Base, getDefRegState(true)) // WB base register 721 .addReg(Base, getKillRegState(isLd ? BaseKill : false)) 722 .addImm(Offset) 723 .addImm(Pred).addReg(PredReg) 724 .addReg(MO.getReg(), (isLd ? getDefRegState(true) : 725 getKillRegState(MO.isKill()))); 726 } else if (isLd) { 727 if (isAM2) 728 // LDR_PRE, LDR_POST, 729 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 730 .addReg(Base, RegState::Define) 731 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 732 else 733 // t2LDR_PRE, t2LDR_POST 734 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 735 .addReg(Base, RegState::Define) 736 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 737 } else { 738 MachineOperand &MO = MI->getOperand(0); 739 if (isAM2) 740 // STR_PRE, STR_POST 741 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 742 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 743 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 744 else 745 // t2STR_PRE, t2STR_POST 746 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 747 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 748 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 749 } 750 MBB.erase(MBBI); 751 752 return true; 753} 754 755/// isMemoryOp - Returns true if instruction is a memory operations (that this 756/// pass is capable of operating on). 757static bool isMemoryOp(const MachineInstr *MI) { 758 if (MI->hasOneMemOperand()) { 759 const MachineMemOperand *MMO = *MI->memoperands_begin(); 760 761 // Don't touch volatile memory accesses - we may be changing their order. 762 if (MMO->isVolatile()) 763 return false; 764 765 // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is 766 // not. 767 if (MMO->getAlignment() < 4) 768 return false; 769 } 770 771 // str <undef> could probably be eliminated entirely, but for now we just want 772 // to avoid making a mess of it. 773 // FIXME: Use str <undef> as a wildcard to enable better stm folding. 774 if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() && 775 MI->getOperand(0).isUndef()) 776 return false; 777 778 // Likewise don't mess with references to undefined addresses. 779 if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && 780 MI->getOperand(1).isUndef()) 781 return false; 782 783 int Opcode = MI->getOpcode(); 784 switch (Opcode) { 785 default: break; 786 case ARM::LDR: 787 case ARM::STR: 788 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 789 case ARM::VLDRS: 790 case ARM::VSTRS: 791 return MI->getOperand(1).isReg(); 792 case ARM::VLDRD: 793 case ARM::VSTRD: 794 return MI->getOperand(1).isReg(); 795 case ARM::t2LDRi8: 796 case ARM::t2LDRi12: 797 case ARM::t2STRi8: 798 case ARM::t2STRi12: 799 return MI->getOperand(1).isReg(); 800 } 801 return false; 802} 803 804/// AdvanceRS - Advance register scavenger to just before the earliest memory 805/// op that is being merged. 806void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 807 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 808 unsigned Position = MemOps[0].Position; 809 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 810 if (MemOps[i].Position < Position) { 811 Position = MemOps[i].Position; 812 Loc = MemOps[i].MBBI; 813 } 814 } 815 816 if (Loc != MBB.begin()) 817 RS->forward(prior(Loc)); 818} 819 820static int getMemoryOpOffset(const MachineInstr *MI) { 821 int Opcode = MI->getOpcode(); 822 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 823 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 824 unsigned NumOperands = MI->getDesc().getNumOperands(); 825 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 826 827 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 828 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 829 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 830 return OffField; 831 832 int Offset = isAM2 833 ? ARM_AM::getAM2Offset(OffField) 834 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 835 : ARM_AM::getAM5Offset(OffField) * 4); 836 if (isAM2) { 837 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 838 Offset = -Offset; 839 } else if (isAM3) { 840 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 841 Offset = -Offset; 842 } else { 843 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 844 Offset = -Offset; 845 } 846 return Offset; 847} 848 849static void InsertLDR_STR(MachineBasicBlock &MBB, 850 MachineBasicBlock::iterator &MBBI, 851 int OffImm, bool isDef, 852 DebugLoc dl, unsigned NewOpc, 853 unsigned Reg, bool RegDeadKill, bool RegUndef, 854 unsigned BaseReg, bool BaseKill, bool BaseUndef, 855 unsigned OffReg, bool OffKill, bool OffUndef, 856 ARMCC::CondCodes Pred, unsigned PredReg, 857 const TargetInstrInfo *TII, bool isT2) { 858 int Offset = OffImm; 859 if (!isT2) { 860 if (OffImm < 0) 861 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 862 else 863 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 864 } 865 if (isDef) { 866 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 867 TII->get(NewOpc)) 868 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 869 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 870 if (!isT2) 871 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 872 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 873 } else { 874 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 875 TII->get(NewOpc)) 876 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 877 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 878 if (!isT2) 879 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 880 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 881 } 882} 883 884bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 885 MachineBasicBlock::iterator &MBBI) { 886 MachineInstr *MI = &*MBBI; 887 unsigned Opcode = MI->getOpcode(); 888 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 889 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 890 unsigned EvenReg = MI->getOperand(0).getReg(); 891 unsigned OddReg = MI->getOperand(1).getReg(); 892 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 893 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 894 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 895 return false; 896 897 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 898 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 899 bool EvenDeadKill = isLd ? 900 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 901 bool EvenUndef = MI->getOperand(0).isUndef(); 902 bool OddDeadKill = isLd ? 903 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 904 bool OddUndef = MI->getOperand(1).isUndef(); 905 const MachineOperand &BaseOp = MI->getOperand(2); 906 unsigned BaseReg = BaseOp.getReg(); 907 bool BaseKill = BaseOp.isKill(); 908 bool BaseUndef = BaseOp.isUndef(); 909 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 910 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 911 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 912 int OffImm = getMemoryOpOffset(MI); 913 unsigned PredReg = 0; 914 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 915 916 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 917 // Ascending register numbers and no offset. It's safe to change it to a 918 // ldm or stm. 919 unsigned NewOpc = (isLd) 920 ? (isT2 ? ARM::t2LDM : ARM::LDM) 921 : (isT2 ? ARM::t2STM : ARM::STM); 922 if (isLd) { 923 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 924 .addReg(BaseReg, getKillRegState(BaseKill)) 925 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 926 .addImm(Pred).addReg(PredReg) 927 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 928 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 929 ++NumLDRD2LDM; 930 } else { 931 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 932 .addReg(BaseReg, getKillRegState(BaseKill)) 933 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 934 .addImm(Pred).addReg(PredReg) 935 .addReg(EvenReg, 936 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 937 .addReg(OddReg, 938 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 939 ++NumSTRD2STM; 940 } 941 } else { 942 // Split into two instructions. 943 assert((!isT2 || !OffReg) && 944 "Thumb2 ldrd / strd does not encode offset register!"); 945 unsigned NewOpc = (isLd) 946 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 947 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 948 DebugLoc dl = MBBI->getDebugLoc(); 949 // If this is a load and base register is killed, it may have been 950 // re-defed by the load, make sure the first load does not clobber it. 951 if (isLd && 952 (BaseKill || OffKill) && 953 (TRI->regsOverlap(EvenReg, BaseReg) || 954 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 955 assert(!TRI->regsOverlap(OddReg, BaseReg) && 956 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 957 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 958 OddReg, OddDeadKill, false, 959 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 960 Pred, PredReg, TII, isT2); 961 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 962 EvenReg, EvenDeadKill, false, 963 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 964 Pred, PredReg, TII, isT2); 965 } else { 966 if (OddReg == EvenReg && EvenDeadKill) { 967 // If the two source operands are the same, the kill marker is probably 968 // on the first one. e.g. 969 // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 970 EvenDeadKill = false; 971 OddDeadKill = true; 972 } 973 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 974 EvenReg, EvenDeadKill, EvenUndef, 975 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 976 Pred, PredReg, TII, isT2); 977 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 978 OddReg, OddDeadKill, OddUndef, 979 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 980 Pred, PredReg, TII, isT2); 981 } 982 if (isLd) 983 ++NumLDRD2LDR; 984 else 985 ++NumSTRD2STR; 986 } 987 988 MBBI = prior(MBBI); 989 MBB.erase(MI); 990 } 991 return false; 992} 993 994/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 995/// ops of the same base and incrementing offset into LDM / STM ops. 996bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 997 unsigned NumMerges = 0; 998 unsigned NumMemOps = 0; 999 MemOpQueue MemOps; 1000 unsigned CurrBase = 0; 1001 int CurrOpc = -1; 1002 unsigned CurrSize = 0; 1003 ARMCC::CondCodes CurrPred = ARMCC::AL; 1004 unsigned CurrPredReg = 0; 1005 unsigned Position = 0; 1006 SmallVector<MachineBasicBlock::iterator,4> Merges; 1007 1008 RS->enterBasicBlock(&MBB); 1009 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1010 while (MBBI != E) { 1011 if (FixInvalidRegPairOp(MBB, MBBI)) 1012 continue; 1013 1014 bool Advance = false; 1015 bool TryMerge = false; 1016 bool Clobber = false; 1017 1018 bool isMemOp = isMemoryOp(MBBI); 1019 if (isMemOp) { 1020 int Opcode = MBBI->getOpcode(); 1021 unsigned Size = getLSMultipleTransferSize(MBBI); 1022 unsigned Base = MBBI->getOperand(1).getReg(); 1023 unsigned PredReg = 0; 1024 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 1025 int Offset = getMemoryOpOffset(MBBI); 1026 // Watch out for: 1027 // r4 := ldr [r5] 1028 // r5 := ldr [r5, #4] 1029 // r6 := ldr [r5, #8] 1030 // 1031 // The second ldr has effectively broken the chain even though it 1032 // looks like the later ldr(s) use the same base register. Try to 1033 // merge the ldr's so far, including this one. But don't try to 1034 // combine the following ldr(s). 1035 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 1036 if (CurrBase == 0 && !Clobber) { 1037 // Start of a new chain. 1038 CurrBase = Base; 1039 CurrOpc = Opcode; 1040 CurrSize = Size; 1041 CurrPred = Pred; 1042 CurrPredReg = PredReg; 1043 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1044 NumMemOps++; 1045 Advance = true; 1046 } else { 1047 if (Clobber) { 1048 TryMerge = true; 1049 Advance = true; 1050 } 1051 1052 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 1053 // No need to match PredReg. 1054 // Continue adding to the queue. 1055 if (Offset > MemOps.back().Offset) { 1056 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 1057 NumMemOps++; 1058 Advance = true; 1059 } else { 1060 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 1061 I != E; ++I) { 1062 if (Offset < I->Offset) { 1063 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 1064 NumMemOps++; 1065 Advance = true; 1066 break; 1067 } else if (Offset == I->Offset) { 1068 // Collision! This can't be merged! 1069 break; 1070 } 1071 } 1072 } 1073 } 1074 } 1075 } 1076 1077 if (Advance) { 1078 ++Position; 1079 ++MBBI; 1080 if (MBBI == E) 1081 // Reach the end of the block, try merging the memory instructions. 1082 TryMerge = true; 1083 } else 1084 TryMerge = true; 1085 1086 if (TryMerge) { 1087 if (NumMemOps > 1) { 1088 // Try to find a free register to use as a new base in case it's needed. 1089 // First advance to the instruction just before the start of the chain. 1090 AdvanceRS(MBB, MemOps); 1091 // Find a scratch register. 1092 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 1093 // Process the load / store instructions. 1094 RS->forward(prior(MBBI)); 1095 1096 // Merge ops. 1097 Merges.clear(); 1098 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 1099 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 1100 1101 // Try folding preceeding/trailing base inc/dec into the generated 1102 // LDM/STM ops. 1103 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 1104 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 1105 ++NumMerges; 1106 NumMerges += Merges.size(); 1107 1108 // Try folding preceeding/trailing base inc/dec into those load/store 1109 // that were not merged to form LDM/STM ops. 1110 for (unsigned i = 0; i != NumMemOps; ++i) 1111 if (!MemOps[i].Merged) 1112 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1113 ++NumMerges; 1114 1115 // RS may be pointing to an instruction that's deleted. 1116 RS->skipTo(prior(MBBI)); 1117 } else if (NumMemOps == 1) { 1118 // Try folding preceeding/trailing base inc/dec into the single 1119 // load/store. 1120 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1121 ++NumMerges; 1122 RS->forward(prior(MBBI)); 1123 } 1124 } 1125 1126 CurrBase = 0; 1127 CurrOpc = -1; 1128 CurrSize = 0; 1129 CurrPred = ARMCC::AL; 1130 CurrPredReg = 0; 1131 if (NumMemOps) { 1132 MemOps.clear(); 1133 NumMemOps = 0; 1134 } 1135 1136 // If iterator hasn't been advanced and this is not a memory op, skip it. 1137 // It can't start a new chain anyway. 1138 if (!Advance && !isMemOp && MBBI != E) { 1139 ++Position; 1140 ++MBBI; 1141 } 1142 } 1143 } 1144 return NumMerges > 0; 1145} 1146 1147namespace { 1148 struct OffsetCompare { 1149 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1150 int LOffset = getMemoryOpOffset(LHS); 1151 int ROffset = getMemoryOpOffset(RHS); 1152 assert(LHS == RHS || LOffset != ROffset); 1153 return LOffset > ROffset; 1154 } 1155 }; 1156} 1157 1158/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 1159/// (bx lr) into the preceeding stack restore so it directly restore the value 1160/// of LR into pc. 1161/// ldmfd sp!, {r7, lr} 1162/// bx lr 1163/// => 1164/// ldmfd sp!, {r7, pc} 1165bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1166 if (MBB.empty()) return false; 1167 1168 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1169 if (MBBI != MBB.begin() && 1170 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { 1171 MachineInstr *PrevMI = prior(MBBI); 1172 if (PrevMI->getOpcode() == ARM::LDM_UPD || 1173 PrevMI->getOpcode() == ARM::t2LDM_UPD) { 1174 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1175 if (MO.getReg() != ARM::LR) 1176 return false; 1177 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1178 PrevMI->setDesc(TII->get(NewOpc)); 1179 MO.setReg(ARM::PC); 1180 MBB.erase(MBBI); 1181 return true; 1182 } 1183 } 1184 return false; 1185} 1186 1187bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1188 const TargetMachine &TM = Fn.getTarget(); 1189 AFI = Fn.getInfo<ARMFunctionInfo>(); 1190 TII = TM.getInstrInfo(); 1191 TRI = TM.getRegisterInfo(); 1192 RS = new RegScavenger(); 1193 isThumb2 = AFI->isThumb2Function(); 1194 1195 bool Modified = false; 1196 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1197 ++MFI) { 1198 MachineBasicBlock &MBB = *MFI; 1199 Modified |= LoadStoreMultipleOpti(MBB); 1200 Modified |= MergeReturnIntoLDM(MBB); 1201 } 1202 1203 delete RS; 1204 return Modified; 1205} 1206 1207 1208/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1209/// load / stores from consecutive locations close to make it more 1210/// likely they will be combined later. 1211 1212namespace { 1213 struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1214 static char ID; 1215 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1216 1217 const TargetData *TD; 1218 const TargetInstrInfo *TII; 1219 const TargetRegisterInfo *TRI; 1220 const ARMSubtarget *STI; 1221 MachineRegisterInfo *MRI; 1222 MachineFunction *MF; 1223 1224 virtual bool runOnMachineFunction(MachineFunction &Fn); 1225 1226 virtual const char *getPassName() const { 1227 return "ARM pre- register allocation load / store optimization pass"; 1228 } 1229 1230 private: 1231 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1232 unsigned &NewOpc, unsigned &EvenReg, 1233 unsigned &OddReg, unsigned &BaseReg, 1234 unsigned &OffReg, int &Offset, 1235 unsigned &PredReg, ARMCC::CondCodes &Pred, 1236 bool &isT2); 1237 bool RescheduleOps(MachineBasicBlock *MBB, 1238 SmallVector<MachineInstr*, 4> &Ops, 1239 unsigned Base, bool isLd, 1240 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1241 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1242 }; 1243 char ARMPreAllocLoadStoreOpt::ID = 0; 1244} 1245 1246bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1247 TD = Fn.getTarget().getTargetData(); 1248 TII = Fn.getTarget().getInstrInfo(); 1249 TRI = Fn.getTarget().getRegisterInfo(); 1250 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1251 MRI = &Fn.getRegInfo(); 1252 MF = &Fn; 1253 1254 bool Modified = false; 1255 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1256 ++MFI) 1257 Modified |= RescheduleLoadStoreInstrs(MFI); 1258 1259 return Modified; 1260} 1261 1262static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1263 MachineBasicBlock::iterator I, 1264 MachineBasicBlock::iterator E, 1265 SmallPtrSet<MachineInstr*, 4> &MemOps, 1266 SmallSet<unsigned, 4> &MemRegs, 1267 const TargetRegisterInfo *TRI) { 1268 // Are there stores / loads / calls between them? 1269 // FIXME: This is overly conservative. We should make use of alias information 1270 // some day. 1271 SmallSet<unsigned, 4> AddedRegPressure; 1272 while (++I != E) { 1273 if (MemOps.count(&*I)) 1274 continue; 1275 const TargetInstrDesc &TID = I->getDesc(); 1276 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1277 return false; 1278 if (isLd && TID.mayStore()) 1279 return false; 1280 if (!isLd) { 1281 if (TID.mayLoad()) 1282 return false; 1283 // It's not safe to move the first 'str' down. 1284 // str r1, [r0] 1285 // strh r5, [r0] 1286 // str r4, [r0, #+4] 1287 if (TID.mayStore()) 1288 return false; 1289 } 1290 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1291 MachineOperand &MO = I->getOperand(j); 1292 if (!MO.isReg()) 1293 continue; 1294 unsigned Reg = MO.getReg(); 1295 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1296 return false; 1297 if (Reg != Base && !MemRegs.count(Reg)) 1298 AddedRegPressure.insert(Reg); 1299 } 1300 } 1301 1302 // Estimate register pressure increase due to the transformation. 1303 if (MemRegs.size() <= 4) 1304 // Ok if we are moving small number of instructions. 1305 return true; 1306 return AddedRegPressure.size() <= MemRegs.size() * 2; 1307} 1308 1309bool 1310ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1311 DebugLoc &dl, 1312 unsigned &NewOpc, unsigned &EvenReg, 1313 unsigned &OddReg, unsigned &BaseReg, 1314 unsigned &OffReg, int &Offset, 1315 unsigned &PredReg, 1316 ARMCC::CondCodes &Pred, 1317 bool &isT2) { 1318 // Make sure we're allowed to generate LDRD/STRD. 1319 if (!STI->hasV5TEOps()) 1320 return false; 1321 1322 // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD 1323 unsigned Scale = 1; 1324 unsigned Opcode = Op0->getOpcode(); 1325 if (Opcode == ARM::LDR) 1326 NewOpc = ARM::LDRD; 1327 else if (Opcode == ARM::STR) 1328 NewOpc = ARM::STRD; 1329 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1330 NewOpc = ARM::t2LDRDi8; 1331 Scale = 4; 1332 isT2 = true; 1333 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1334 NewOpc = ARM::t2STRDi8; 1335 Scale = 4; 1336 isT2 = true; 1337 } else 1338 return false; 1339 1340 // Make sure the offset registers match. 1341 if (!isT2 && 1342 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1343 return false; 1344 1345 // Must sure the base address satisfies i64 ld / st alignment requirement. 1346 if (!Op0->hasOneMemOperand() || 1347 !(*Op0->memoperands_begin())->getValue() || 1348 (*Op0->memoperands_begin())->isVolatile()) 1349 return false; 1350 1351 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1352 Function *Func = MF->getFunction(); 1353 unsigned ReqAlign = STI->hasV6Ops() 1354 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1355 : 8; // Pre-v6 need 8-byte align 1356 if (Align < ReqAlign) 1357 return false; 1358 1359 // Then make sure the immediate offset fits. 1360 int OffImm = getMemoryOpOffset(Op0); 1361 if (isT2) { 1362 if (OffImm < 0) { 1363 if (OffImm < -255) 1364 // Can't fall back to t2LDRi8 / t2STRi8. 1365 return false; 1366 } else { 1367 int Limit = (1 << 8) * Scale; 1368 if (OffImm >= Limit || (OffImm & (Scale-1))) 1369 return false; 1370 } 1371 Offset = OffImm; 1372 } else { 1373 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1374 if (OffImm < 0) { 1375 AddSub = ARM_AM::sub; 1376 OffImm = - OffImm; 1377 } 1378 int Limit = (1 << 8) * Scale; 1379 if (OffImm >= Limit || (OffImm & (Scale-1))) 1380 return false; 1381 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1382 } 1383 EvenReg = Op0->getOperand(0).getReg(); 1384 OddReg = Op1->getOperand(0).getReg(); 1385 if (EvenReg == OddReg) 1386 return false; 1387 BaseReg = Op0->getOperand(1).getReg(); 1388 if (!isT2) 1389 OffReg = Op0->getOperand(2).getReg(); 1390 Pred = llvm::getInstrPredicate(Op0, PredReg); 1391 dl = Op0->getDebugLoc(); 1392 return true; 1393} 1394 1395bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1396 SmallVector<MachineInstr*, 4> &Ops, 1397 unsigned Base, bool isLd, 1398 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1399 bool RetVal = false; 1400 1401 // Sort by offset (in reverse order). 1402 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1403 1404 // The loads / stores of the same base are in order. Scan them from first to 1405 // last and check for the followins: 1406 // 1. Any def of base. 1407 // 2. Any gaps. 1408 while (Ops.size() > 1) { 1409 unsigned FirstLoc = ~0U; 1410 unsigned LastLoc = 0; 1411 MachineInstr *FirstOp = 0; 1412 MachineInstr *LastOp = 0; 1413 int LastOffset = 0; 1414 unsigned LastOpcode = 0; 1415 unsigned LastBytes = 0; 1416 unsigned NumMove = 0; 1417 for (int i = Ops.size() - 1; i >= 0; --i) { 1418 MachineInstr *Op = Ops[i]; 1419 unsigned Loc = MI2LocMap[Op]; 1420 if (Loc <= FirstLoc) { 1421 FirstLoc = Loc; 1422 FirstOp = Op; 1423 } 1424 if (Loc >= LastLoc) { 1425 LastLoc = Loc; 1426 LastOp = Op; 1427 } 1428 1429 unsigned Opcode = Op->getOpcode(); 1430 if (LastOpcode && Opcode != LastOpcode) 1431 break; 1432 1433 int Offset = getMemoryOpOffset(Op); 1434 unsigned Bytes = getLSMultipleTransferSize(Op); 1435 if (LastBytes) { 1436 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1437 break; 1438 } 1439 LastOffset = Offset; 1440 LastBytes = Bytes; 1441 LastOpcode = Opcode; 1442 if (++NumMove == 8) // FIXME: Tune this limit. 1443 break; 1444 } 1445 1446 if (NumMove <= 1) 1447 Ops.pop_back(); 1448 else { 1449 SmallPtrSet<MachineInstr*, 4> MemOps; 1450 SmallSet<unsigned, 4> MemRegs; 1451 for (int i = NumMove-1; i >= 0; --i) { 1452 MemOps.insert(Ops[i]); 1453 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1454 } 1455 1456 // Be conservative, if the instructions are too far apart, don't 1457 // move them. We want to limit the increase of register pressure. 1458 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1459 if (DoMove) 1460 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1461 MemOps, MemRegs, TRI); 1462 if (!DoMove) { 1463 for (unsigned i = 0; i != NumMove; ++i) 1464 Ops.pop_back(); 1465 } else { 1466 // This is the new location for the loads / stores. 1467 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1468 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1469 ++InsertPos; 1470 1471 // If we are moving a pair of loads / stores, see if it makes sense 1472 // to try to allocate a pair of registers that can form register pairs. 1473 MachineInstr *Op0 = Ops.back(); 1474 MachineInstr *Op1 = Ops[Ops.size()-2]; 1475 unsigned EvenReg = 0, OddReg = 0; 1476 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1477 ARMCC::CondCodes Pred = ARMCC::AL; 1478 bool isT2 = false; 1479 unsigned NewOpc = 0; 1480 int Offset = 0; 1481 DebugLoc dl; 1482 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1483 EvenReg, OddReg, BaseReg, OffReg, 1484 Offset, PredReg, Pred, isT2)) { 1485 Ops.pop_back(); 1486 Ops.pop_back(); 1487 1488 // Form the pair instruction. 1489 if (isLd) { 1490 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1491 dl, TII->get(NewOpc)) 1492 .addReg(EvenReg, RegState::Define) 1493 .addReg(OddReg, RegState::Define) 1494 .addReg(BaseReg); 1495 if (!isT2) 1496 MIB.addReg(OffReg); 1497 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1498 ++NumLDRDFormed; 1499 } else { 1500 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1501 dl, TII->get(NewOpc)) 1502 .addReg(EvenReg) 1503 .addReg(OddReg) 1504 .addReg(BaseReg); 1505 if (!isT2) 1506 MIB.addReg(OffReg); 1507 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1508 ++NumSTRDFormed; 1509 } 1510 MBB->erase(Op0); 1511 MBB->erase(Op1); 1512 1513 // Add register allocation hints to form register pairs. 1514 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1515 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1516 } else { 1517 for (unsigned i = 0; i != NumMove; ++i) { 1518 MachineInstr *Op = Ops.back(); 1519 Ops.pop_back(); 1520 MBB->splice(InsertPos, MBB, Op); 1521 } 1522 } 1523 1524 NumLdStMoved += NumMove; 1525 RetVal = true; 1526 } 1527 } 1528 } 1529 1530 return RetVal; 1531} 1532 1533bool 1534ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1535 bool RetVal = false; 1536 1537 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1538 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1539 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1540 SmallVector<unsigned, 4> LdBases; 1541 SmallVector<unsigned, 4> StBases; 1542 1543 unsigned Loc = 0; 1544 MachineBasicBlock::iterator MBBI = MBB->begin(); 1545 MachineBasicBlock::iterator E = MBB->end(); 1546 while (MBBI != E) { 1547 for (; MBBI != E; ++MBBI) { 1548 MachineInstr *MI = MBBI; 1549 const TargetInstrDesc &TID = MI->getDesc(); 1550 if (TID.isCall() || TID.isTerminator()) { 1551 // Stop at barriers. 1552 ++MBBI; 1553 break; 1554 } 1555 1556 MI2LocMap[MI] = Loc++; 1557 if (!isMemoryOp(MI)) 1558 continue; 1559 unsigned PredReg = 0; 1560 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1561 continue; 1562 1563 int Opc = MI->getOpcode(); 1564 bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; 1565 unsigned Base = MI->getOperand(1).getReg(); 1566 int Offset = getMemoryOpOffset(MI); 1567 1568 bool StopHere = false; 1569 if (isLd) { 1570 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1571 Base2LdsMap.find(Base); 1572 if (BI != Base2LdsMap.end()) { 1573 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1574 if (Offset == getMemoryOpOffset(BI->second[i])) { 1575 StopHere = true; 1576 break; 1577 } 1578 } 1579 if (!StopHere) 1580 BI->second.push_back(MI); 1581 } else { 1582 SmallVector<MachineInstr*, 4> MIs; 1583 MIs.push_back(MI); 1584 Base2LdsMap[Base] = MIs; 1585 LdBases.push_back(Base); 1586 } 1587 } else { 1588 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1589 Base2StsMap.find(Base); 1590 if (BI != Base2StsMap.end()) { 1591 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1592 if (Offset == getMemoryOpOffset(BI->second[i])) { 1593 StopHere = true; 1594 break; 1595 } 1596 } 1597 if (!StopHere) 1598 BI->second.push_back(MI); 1599 } else { 1600 SmallVector<MachineInstr*, 4> MIs; 1601 MIs.push_back(MI); 1602 Base2StsMap[Base] = MIs; 1603 StBases.push_back(Base); 1604 } 1605 } 1606 1607 if (StopHere) { 1608 // Found a duplicate (a base+offset combination that's seen earlier). 1609 // Backtrack. 1610 --Loc; 1611 break; 1612 } 1613 } 1614 1615 // Re-schedule loads. 1616 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1617 unsigned Base = LdBases[i]; 1618 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1619 if (Lds.size() > 1) 1620 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1621 } 1622 1623 // Re-schedule stores. 1624 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1625 unsigned Base = StBases[i]; 1626 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1627 if (Sts.size() > 1) 1628 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1629 } 1630 1631 if (MBBI != E) { 1632 Base2LdsMap.clear(); 1633 Base2StsMap.clear(); 1634 LdBases.clear(); 1635 StBases.clear(); 1636 } 1637 } 1638 1639 return RetVal; 1640} 1641 1642 1643/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1644/// optimization pass. 1645FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1646 if (PreAlloc) 1647 return new ARMPreAllocLoadStoreOpt(); 1648 return new ARMLoadStoreOpt(); 1649} 1650