ARMLoadStoreOptimizer.cpp revision f6fa5ee5c201f91217c3034ddcaecb63e7d8cd5a
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/ADT/SmallVector.h" 22#include "llvm/ADT/Statistic.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/RegisterScavenging.h" 28#include "llvm/Support/Compiler.h" 29#include "llvm/Target/MRegisterInfo.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32using namespace llvm; 33 34STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 35STATISTIC(NumSTMGened , "Number of stm instructions generated"); 36STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); 37STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); 38 39namespace { 40 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { 41 const TargetInstrInfo *TII; 42 const MRegisterInfo *MRI; 43 ARMFunctionInfo *AFI; 44 RegScavenger *RS; 45 46 virtual bool runOnMachineFunction(MachineFunction &Fn); 47 48 virtual const char *getPassName() const { 49 return "ARM load / store optimization pass"; 50 } 51 52 private: 53 struct MemOpQueueEntry { 54 int Offset; 55 unsigned Position; 56 MachineBasicBlock::iterator MBBI; 57 bool Merged; 58 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 59 : Offset(o), Position(p), MBBI(i), Merged(false) {}; 60 }; 61 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 62 typedef MemOpQueue::iterator MemOpQueueIter; 63 64 SmallVector<MachineBasicBlock::iterator, 4> 65 MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 66 int Opcode, unsigned Size, unsigned Scratch, 67 MemOpQueue &MemOps); 68 69 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 70 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 71 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 72 }; 73} 74 75/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 76/// optimization pass. 77FunctionPass *llvm::createARMLoadStoreOptimizationPass() { 78 return new ARMLoadStoreOpt(); 79} 80 81static int getLoadStoreMultipleOpcode(int Opcode) { 82 switch (Opcode) { 83 case ARM::LDR: 84 NumLDMGened++; 85 return ARM::LDM; 86 case ARM::STR: 87 NumSTMGened++; 88 return ARM::STM; 89 case ARM::FLDS: 90 NumFLDMGened++; 91 return ARM::FLDMS; 92 case ARM::FSTS: 93 NumFSTMGened++; 94 return ARM::FSTMS; 95 case ARM::FLDD: 96 NumFLDMGened++; 97 return ARM::FLDMD; 98 case ARM::FSTD: 99 NumFSTMGened++; 100 return ARM::FSTMD; 101 default: abort(); 102 } 103 return 0; 104} 105 106/// mergeOps - Create and insert a LDM or STM with Base as base register and 107/// registers in Regs as the register operands that would be loaded / stored. 108/// It returns true if the transformation is done. 109static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 110 int Offset, unsigned Base, bool BaseKill, int Opcode, 111 unsigned Scratch, 112 SmallVector<std::pair<unsigned, bool>, 8> &Regs, 113 const TargetInstrInfo *TII) { 114 // Only a single register to load / store. Don't bother. 115 unsigned NumRegs = Regs.size(); 116 if (NumRegs <= 1) 117 return false; 118 119 ARM_AM::AMSubMode Mode = ARM_AM::ia; 120 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; 121 if (isAM4 && Offset == 4) 122 Mode = ARM_AM::ib; 123 else if (isAM4 && Offset == -4 * (int)NumRegs + 4) 124 Mode = ARM_AM::da; 125 else if (isAM4 && Offset == -4 * (int)NumRegs) 126 Mode = ARM_AM::db; 127 else if (Offset != 0) { 128 // If starting offset isn't zero, insert a MI to materialize a new base. 129 // But only do so if it is cost effective, i.e. merging more than two 130 // loads / stores. 131 if (NumRegs <= 2) 132 return false; 133 134 unsigned NewBase; 135 if (Opcode == ARM::LDR) 136 // If it is a load, then just use one of the destination register to 137 // use as the new base. 138 NewBase = Regs[NumRegs-1].first; 139 else { 140 // Use the scratch register to use as a new base. 141 NewBase = Scratch; 142 if (NewBase == 0) 143 return false; 144 } 145 int BaseOpc = ARM::ADDri; 146 if (Offset < 0) { 147 BaseOpc = ARM::SUBri; 148 Offset = - Offset; 149 } 150 int ImmedOffset = ARM_AM::getSOImmVal(Offset); 151 if (ImmedOffset == -1) 152 return false; // Probably not worth it then. 153 154 BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase) 155 .addReg(Base, false, false, BaseKill).addImm(ImmedOffset); 156 Base = NewBase; 157 BaseKill = true; // New base is always killed right its use. 158 } 159 160 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; 161 bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 162 Opcode = getLoadStoreMultipleOpcode(Opcode); 163 MachineInstrBuilder MIB = (isAM4) 164 ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill) 165 .addImm(ARM_AM::getAM4ModeImm(Mode)) 166 : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill) 167 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)); 168 for (unsigned i = 0; i != NumRegs; ++i) 169 MIB = MIB.addReg(Regs[i].first, isDef, false, Regs[i].second); 170 171 return true; 172} 173 174/// MergeLDR_STR - Merge a number of load / store instructions into one or more 175/// load / store multiple instructions. 176SmallVector<MachineBasicBlock::iterator, 4> 177ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 178 unsigned Base, int Opcode, unsigned Size, 179 unsigned Scratch, MemOpQueue &MemOps) { 180 SmallVector<MachineBasicBlock::iterator, 4> Merges; 181 SmallVector<std::pair<unsigned,bool>, 8> Regs; 182 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; 183 int Offset = MemOps[SIndex].Offset; 184 int SOffset = Offset; 185 unsigned Pos = MemOps[SIndex].Position; 186 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 187 unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); 188 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); 189 bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill(); 190 Regs.push_back(std::make_pair(PReg, isKill)); 191 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 192 int NewOffset = MemOps[i].Offset; 193 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); 194 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); 195 isKill = MemOps[i].MBBI->getOperand(0).isKill(); 196 // AM4 - register numbers in ascending order. 197 // AM5 - consecutive register numbers in ascending order. 198 if (NewOffset == Offset + (int)Size && 199 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 200 Offset += Size; 201 Regs.push_back(std::make_pair(Reg, isKill)); 202 PRegNum = RegNum; 203 } else { 204 // Can't merge this in. Try merge the earlier ones first. 205 if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode,Scratch,Regs,TII)) { 206 Merges.push_back(prior(Loc)); 207 for (unsigned j = SIndex; j < i; ++j) { 208 MBB.erase(MemOps[j].MBBI); 209 MemOps[j].Merged = true; 210 } 211 } 212 SmallVector<MachineBasicBlock::iterator, 4> Merges2 = 213 MergeLDR_STR(MBB, i, Base, Opcode, Size, Scratch, MemOps); 214 Merges.append(Merges2.begin(), Merges2.end()); 215 return Merges; 216 } 217 218 if (MemOps[i].Position > Pos) { 219 Pos = MemOps[i].Position; 220 Loc = MemOps[i].MBBI; 221 } 222 } 223 224 bool BaseKill = Loc->findRegisterUseOperand(Base, true) != -1; 225 if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode,Scratch,Regs, TII)) { 226 Merges.push_back(prior(Loc)); 227 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { 228 MBB.erase(MemOps[i].MBBI); 229 MemOps[i].Merged = true; 230 } 231 } 232 233 return Merges; 234} 235 236static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 237 unsigned Bytes) { 238 return (MI && MI->getOpcode() == ARM::SUBri && 239 MI->getOperand(0).getReg() == Base && 240 MI->getOperand(1).getReg() == Base && 241 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes); 242} 243 244static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 245 unsigned Bytes) { 246 return (MI && MI->getOpcode() == ARM::ADDri && 247 MI->getOperand(0).getReg() == Base && 248 MI->getOperand(1).getReg() == Base && 249 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes); 250} 251 252static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 253 switch (MI->getOpcode()) { 254 default: return 0; 255 case ARM::LDR: 256 case ARM::STR: 257 case ARM::FLDS: 258 case ARM::FSTS: 259 return 4; 260 case ARM::FLDD: 261 case ARM::FSTD: 262 return 8; 263 case ARM::LDM: 264 case ARM::STM: 265 return (MI->getNumOperands() - 2) * 4; 266 case ARM::FLDMS: 267 case ARM::FSTMS: 268 case ARM::FLDMD: 269 case ARM::FSTMD: 270 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 271 } 272} 273 274/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 275/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: 276/// 277/// stmia rn, <ra, rb, rc> 278/// rn := rn + 4 * 3; 279/// => 280/// stmia rn!, <ra, rb, rc> 281/// 282/// rn := rn - 4 * 3; 283/// ldmia rn, <ra, rb, rc> 284/// => 285/// ldmdb rn!, <ra, rb, rc> 286static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 287 MachineBasicBlock::iterator MBBI) { 288 MachineInstr *MI = MBBI; 289 unsigned Base = MI->getOperand(0).getReg(); 290 unsigned Bytes = getLSMultipleTransferSize(MI); 291 int Opcode = MI->getOpcode(); 292 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; 293 294 if (isAM4) { 295 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 296 return false; 297 298 // Can't use the updating AM4 sub-mode if the base register is also a dest 299 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 300 for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i) { 301 if (MI->getOperand(i).getReg() == Base) 302 return false; 303 } 304 305 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 306 if (MBBI != MBB.begin()) { 307 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 308 if (Mode == ARM_AM::ia && 309 isMatchingDecrement(PrevMBBI, Base, Bytes)) { 310 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 311 MBB.erase(PrevMBBI); 312 return true; 313 } else if (Mode == ARM_AM::ib && 314 isMatchingDecrement(PrevMBBI, Base, Bytes)) { 315 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 316 MBB.erase(PrevMBBI); 317 return true; 318 } 319 } 320 321 if (MBBI != MBB.end()) { 322 MachineBasicBlock::iterator NextMBBI = next(MBBI); 323 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 324 isMatchingIncrement(NextMBBI, Base, Bytes)) { 325 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 326 MBB.erase(NextMBBI); 327 return true; 328 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 329 isMatchingDecrement(NextMBBI, Base, Bytes)) { 330 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 331 MBB.erase(NextMBBI); 332 return true; 333 } 334 } 335 } else { 336 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. 337 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 338 return false; 339 340 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 341 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 342 if (MBBI != MBB.begin()) { 343 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 344 if (Mode == ARM_AM::ia && 345 isMatchingDecrement(PrevMBBI, Base, Bytes)) { 346 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 347 MBB.erase(PrevMBBI); 348 return true; 349 } 350 } 351 352 if (MBBI != MBB.end()) { 353 MachineBasicBlock::iterator NextMBBI = next(MBBI); 354 if (Mode == ARM_AM::ia && 355 isMatchingIncrement(NextMBBI, Base, Bytes)) { 356 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 357 MBB.erase(NextMBBI); 358 } 359 return true; 360 } 361 } 362 363 return false; 364} 365 366static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 367 switch (Opc) { 368 case ARM::LDR: return ARM::LDR_PRE; 369 case ARM::STR: return ARM::STR_PRE; 370 case ARM::FLDS: return ARM::FLDMS; 371 case ARM::FLDD: return ARM::FLDMD; 372 case ARM::FSTS: return ARM::FSTMS; 373 case ARM::FSTD: return ARM::FSTMD; 374 default: abort(); 375 } 376 return 0; 377} 378 379static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 380 switch (Opc) { 381 case ARM::LDR: return ARM::LDR_POST; 382 case ARM::STR: return ARM::STR_POST; 383 case ARM::FLDS: return ARM::FLDMS; 384 case ARM::FLDD: return ARM::FLDMD; 385 case ARM::FSTS: return ARM::FSTMS; 386 case ARM::FSTD: return ARM::FSTMD; 387 default: abort(); 388 } 389 return 0; 390} 391 392/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 393/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 394static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 395 MachineBasicBlock::iterator MBBI, 396 const TargetInstrInfo *TII) { 397 MachineInstr *MI = MBBI; 398 unsigned Base = MI->getOperand(1).getReg(); 399 bool BaseKill = MI->getOperand(1).isKill(); 400 unsigned Bytes = getLSMultipleTransferSize(MI); 401 int Opcode = MI->getOpcode(); 402 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 403 if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || 404 (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) 405 return false; 406 407 bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 408 // Can't do the merge if the destination register is the same as the would-be 409 // writeback register. 410 if (isLd && MI->getOperand(0).getReg() == Base) 411 return false; 412 413 bool DoMerge = false; 414 ARM_AM::AddrOpc AddSub = ARM_AM::add; 415 unsigned NewOpc = 0; 416 if (MBBI != MBB.begin()) { 417 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 418 if (isMatchingDecrement(PrevMBBI, Base, Bytes)) { 419 DoMerge = true; 420 AddSub = ARM_AM::sub; 421 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 422 } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes)) { 423 DoMerge = true; 424 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 425 } 426 if (DoMerge) 427 MBB.erase(PrevMBBI); 428 } 429 430 if (!DoMerge && MBBI != MBB.end()) { 431 MachineBasicBlock::iterator NextMBBI = next(MBBI); 432 if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes)) { 433 DoMerge = true; 434 AddSub = ARM_AM::sub; 435 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 436 } else if (isMatchingIncrement(NextMBBI, Base, Bytes)) { 437 DoMerge = true; 438 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 439 } 440 if (DoMerge) 441 MBB.erase(NextMBBI); 442 } 443 444 if (!DoMerge) 445 return false; 446 447 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; 448 unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) 449 : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, 450 true, isDPR ? 2 : 1); 451 if (isLd) { 452 if (isAM2) 453 // LDR_PRE, LDR_POST; 454 BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg()) 455 .addReg(Base, true) 456 .addReg(Base).addReg(0).addImm(Offset); 457 else 458 BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base, false, false, BaseKill) 459 .addImm(Offset).addReg(MI->getOperand(0).getReg(), true); 460 } else { 461 MachineOperand &MO = MI->getOperand(0); 462 if (isAM2) 463 // STR_PRE, STR_POST; 464 BuildMI(MBB, MBBI, TII->get(NewOpc), Base) 465 .addReg(MO.getReg(), false, false, MO.isKill()) 466 .addReg(Base).addReg(0).addImm(Offset); 467 else 468 BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base) 469 .addImm(Offset).addReg(MO.getReg(), false, false, MO.isKill()); 470 } 471 MBB.erase(MBBI); 472 473 return true; 474} 475 476/// isMemoryOp - Returns true if instruction is a memory operations (that this 477/// pass is capable of operating on). 478static bool isMemoryOp(MachineInstr *MI) { 479 int Opcode = MI->getOpcode(); 480 switch (Opcode) { 481 default: break; 482 case ARM::LDR: 483 case ARM::STR: 484 return MI->getOperand(1).isRegister() && MI->getOperand(2).getReg() == 0; 485 case ARM::FLDS: 486 case ARM::FSTS: 487 return MI->getOperand(1).isRegister(); 488 case ARM::FLDD: 489 case ARM::FSTD: 490 return MI->getOperand(1).isRegister(); 491 } 492 return false; 493} 494 495/// AdvanceRS - Advance register scavenger to just before the earliest memory 496/// op that is being merged. 497void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 498 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 499 unsigned Position = MemOps[0].Position; 500 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 501 if (MemOps[i].Position < Position) { 502 Position = MemOps[i].Position; 503 Loc = MemOps[i].MBBI; 504 } 505 } 506 507 if (Loc != MBB.begin()) 508 RS->forward(prior(Loc)); 509} 510 511/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 512/// ops of the same base and incrementing offset into LDM / STM ops. 513bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 514 unsigned NumMerges = 0; 515 unsigned NumMemOps = 0; 516 MemOpQueue MemOps; 517 unsigned CurrBase = 0; 518 int CurrOpc = -1; 519 unsigned CurrSize = 0; 520 unsigned Position = 0; 521 522 RS->enterBasicBlock(&MBB); 523 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 524 while (MBBI != E) { 525 bool Advance = false; 526 bool TryMerge = false; 527 bool Clobber = false; 528 529 bool isMemOp = isMemoryOp(MBBI); 530 if (isMemOp) { 531 int Opcode = MBBI->getOpcode(); 532 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 533 unsigned Size = getLSMultipleTransferSize(MBBI); 534 unsigned Base = MBBI->getOperand(1).getReg(); 535 unsigned OffIdx = MBBI->getNumOperands()-1; 536 unsigned OffField = MBBI->getOperand(OffIdx).getImm(); 537 int Offset = isAM2 538 ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; 539 if (isAM2) { 540 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 541 Offset = -Offset; 542 } else { 543 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 544 Offset = -Offset; 545 } 546 // Watch out for: 547 // r4 := ldr [r5] 548 // r5 := ldr [r5, #4] 549 // r6 := ldr [r5, #8] 550 // 551 // The second ldr has effectively broken the chain even though it 552 // looks like the later ldr(s) use the same base register. Try to 553 // merge the ldr's so far, including this one. But don't try to 554 // combine the following ldr(s). 555 Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); 556 if (CurrBase == 0 && !Clobber) { 557 // Start of a new chain. 558 CurrBase = Base; 559 CurrOpc = Opcode; 560 CurrSize = Size; 561 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 562 NumMemOps++; 563 Advance = true; 564 } else { 565 if (Clobber) { 566 TryMerge = true; 567 Advance = true; 568 } 569 570 if (CurrOpc == Opcode && CurrBase == Base) { 571 // Continue adding to the queue. 572 if (Offset > MemOps.back().Offset) { 573 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 574 NumMemOps++; 575 Advance = true; 576 } else { 577 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 578 I != E; ++I) { 579 if (Offset < I->Offset) { 580 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 581 NumMemOps++; 582 Advance = true; 583 break; 584 } else if (Offset == I->Offset) { 585 // Collision! This can't be merged! 586 break; 587 } 588 } 589 } 590 } 591 } 592 } 593 594 if (Advance) { 595 ++Position; 596 ++MBBI; 597 } else 598 TryMerge = true; 599 600 if (TryMerge) { 601 if (NumMemOps > 1) { 602 // Try to find a free register to use as a new base in case it's needed. 603 // First advance to the instruction just before the start of the chain. 604 AdvanceRS(MBB, MemOps); 605 // Find a scratch register. Make sure it's a call clobbered register or 606 // a spilled callee-saved register. 607 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); 608 if (!Scratch) 609 Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, 610 AFI->getSpilledCSRegisters()); 611 // Process the load / store instructions. 612 RS->forward(prior(MBBI)); 613 614 // Merge ops. 615 SmallVector<MachineBasicBlock::iterator,4> MBBII = 616 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, Scratch, MemOps); 617 618 // Try folding preceeding/trailing base inc/dec into the generated 619 // LDM/STM ops. 620 for (unsigned i = 0, e = MBBII.size(); i < e; ++i) 621 if (mergeBaseUpdateLSMultiple(MBB, MBBII[i])) 622 NumMerges++; 623 NumMerges += MBBII.size(); 624 625 // Try folding preceeding/trailing base inc/dec into those load/store 626 // that were not merged to form LDM/STM ops. 627 for (unsigned i = 0; i != NumMemOps; ++i) 628 if (!MemOps[i].Merged) 629 if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII)) 630 NumMerges++; 631 632 // RS may be pointing to an instruction that's deleted. 633 RS->skipTo(prior(MBBI)); 634 } 635 636 CurrBase = 0; 637 CurrOpc = -1; 638 if (NumMemOps) { 639 MemOps.clear(); 640 NumMemOps = 0; 641 } 642 643 // If iterator hasn't been advanced and this is not a memory op, skip it. 644 // It can't start a new chain anyway. 645 if (!Advance && !isMemOp && MBBI != E) { 646 ++Position; 647 ++MBBI; 648 } 649 } 650 } 651 return NumMerges > 0; 652} 653 654/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 655/// (bx lr) into the preceeding stack restore so it directly restore the value 656/// of LR into pc. 657/// ldmfd sp!, {r7, lr} 658/// bx lr 659/// => 660/// ldmfd sp!, {r7, pc} 661bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 662 if (MBB.empty()) return false; 663 664 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 665 if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { 666 MachineInstr *PrevMI = prior(MBBI); 667 if (PrevMI->getOpcode() == ARM::LDM) { 668 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 669 if (MO.getReg() == ARM::LR) { 670 PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET)); 671 MO.setReg(ARM::PC); 672 MBB.erase(MBBI); 673 return true; 674 } 675 } 676 } 677 return false; 678} 679 680bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 681 const TargetMachine &TM = Fn.getTarget(); 682 AFI = Fn.getInfo<ARMFunctionInfo>(); 683 TII = TM.getInstrInfo(); 684 MRI = TM.getRegisterInfo(); 685 RS = new RegScavenger(); 686 687 bool Modified = false; 688 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 689 ++MFI) { 690 MachineBasicBlock &MBB = *MFI; 691 Modified |= LoadStoreMultipleOpti(MBB); 692 Modified |= MergeReturnIntoLDM(MBB); 693 } 694 695 delete RS; 696 return Modified; 697} 698