ARMLoadStoreOptimizer.cpp revision 6232760ee0d566bcf09b2f20bae65c1d6e73946c
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/ADT/SmallVector.h" 22#include "llvm/ADT/Statistic.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/RegisterScavenging.h" 28#include "llvm/Support/Compiler.h" 29#include "llvm/Target/MRegisterInfo.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32using namespace llvm; 33 34STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 35STATISTIC(NumSTMGened , "Number of stm instructions generated"); 36STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); 37STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); 38 39namespace { 40 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { 41 static char ID; 42 ARMLoadStoreOpt() : MachineFunctionPass((intptr_t)&ID) {} 43 44 const TargetInstrInfo *TII; 45 const MRegisterInfo *MRI; 46 ARMFunctionInfo *AFI; 47 RegScavenger *RS; 48 49 virtual bool runOnMachineFunction(MachineFunction &Fn); 50 51 virtual const char *getPassName() const { 52 return "ARM load / store optimization pass"; 53 } 54 55 private: 56 struct MemOpQueueEntry { 57 int Offset; 58 unsigned Position; 59 MachineBasicBlock::iterator MBBI; 60 bool Merged; 61 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 62 : Offset(o), Position(p), MBBI(i), Merged(false) {}; 63 }; 64 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 65 typedef MemOpQueue::iterator MemOpQueueIter; 66 67 SmallVector<MachineBasicBlock::iterator, 4> 68 MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 69 int Opcode, unsigned Size, 70 ARMCC::CondCodes Pred, unsigned PredReg, 71 unsigned Scratch, MemOpQueue &MemOps); 72 73 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 74 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 75 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 76 }; 77 char ARMLoadStoreOpt::ID = 0; 78} 79 80/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 81/// optimization pass. 82FunctionPass *llvm::createARMLoadStoreOptimizationPass() { 83 return new ARMLoadStoreOpt(); 84} 85 86static int getLoadStoreMultipleOpcode(int Opcode) { 87 switch (Opcode) { 88 case ARM::LDR: 89 NumLDMGened++; 90 return ARM::LDM; 91 case ARM::STR: 92 NumSTMGened++; 93 return ARM::STM; 94 case ARM::FLDS: 95 NumFLDMGened++; 96 return ARM::FLDMS; 97 case ARM::FSTS: 98 NumFSTMGened++; 99 return ARM::FSTMS; 100 case ARM::FLDD: 101 NumFLDMGened++; 102 return ARM::FLDMD; 103 case ARM::FSTD: 104 NumFSTMGened++; 105 return ARM::FSTMD; 106 default: abort(); 107 } 108 return 0; 109} 110 111/// mergeOps - Create and insert a LDM or STM with Base as base register and 112/// registers in Regs as the register operands that would be loaded / stored. 113/// It returns true if the transformation is done. 114static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 115 int Offset, unsigned Base, bool BaseKill, int Opcode, 116 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 117 SmallVector<std::pair<unsigned, bool>, 8> &Regs, 118 const TargetInstrInfo *TII) { 119 // Only a single register to load / store. Don't bother. 120 unsigned NumRegs = Regs.size(); 121 if (NumRegs <= 1) 122 return false; 123 124 ARM_AM::AMSubMode Mode = ARM_AM::ia; 125 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; 126 if (isAM4 && Offset == 4) 127 Mode = ARM_AM::ib; 128 else if (isAM4 && Offset == -4 * (int)NumRegs + 4) 129 Mode = ARM_AM::da; 130 else if (isAM4 && Offset == -4 * (int)NumRegs) 131 Mode = ARM_AM::db; 132 else if (Offset != 0) { 133 // If starting offset isn't zero, insert a MI to materialize a new base. 134 // But only do so if it is cost effective, i.e. merging more than two 135 // loads / stores. 136 if (NumRegs <= 2) 137 return false; 138 139 unsigned NewBase; 140 if (Opcode == ARM::LDR) 141 // If it is a load, then just use one of the destination register to 142 // use as the new base. 143 NewBase = Regs[NumRegs-1].first; 144 else { 145 // Use the scratch register to use as a new base. 146 NewBase = Scratch; 147 if (NewBase == 0) 148 return false; 149 } 150 int BaseOpc = ARM::ADDri; 151 if (Offset < 0) { 152 BaseOpc = ARM::SUBri; 153 Offset = - Offset; 154 } 155 int ImmedOffset = ARM_AM::getSOImmVal(Offset); 156 if (ImmedOffset == -1) 157 return false; // Probably not worth it then. 158 159 BuildMI(MBB, MBBI, TII->get(BaseOpc), NewBase) 160 .addReg(Base, false, false, BaseKill).addImm(ImmedOffset) 161 .addImm(Pred).addReg(PredReg).addReg(0); 162 Base = NewBase; 163 BaseKill = true; // New base is always killed right its use. 164 } 165 166 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; 167 bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 168 Opcode = getLoadStoreMultipleOpcode(Opcode); 169 MachineInstrBuilder MIB = (isAM4) 170 ? BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill) 171 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 172 : BuildMI(MBB, MBBI, TII->get(Opcode)).addReg(Base, false, false, BaseKill) 173 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) 174 .addImm(Pred).addReg(PredReg); 175 for (unsigned i = 0; i != NumRegs; ++i) 176 MIB = MIB.addReg(Regs[i].first, isDef, false, Regs[i].second); 177 178 return true; 179} 180 181/// MergeLDR_STR - Merge a number of load / store instructions into one or more 182/// load / store multiple instructions. 183SmallVector<MachineBasicBlock::iterator, 4> 184ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 185 unsigned Base, int Opcode, unsigned Size, 186 ARMCC::CondCodes Pred, unsigned PredReg, 187 unsigned Scratch, MemOpQueue &MemOps) { 188 SmallVector<MachineBasicBlock::iterator, 4> Merges; 189 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; 190 int Offset = MemOps[SIndex].Offset; 191 int SOffset = Offset; 192 unsigned Pos = MemOps[SIndex].Position; 193 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 194 unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); 195 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); 196 bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill(); 197 198 SmallVector<std::pair<unsigned,bool>, 8> Regs; 199 Regs.push_back(std::make_pair(PReg, isKill)); 200 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 201 int NewOffset = MemOps[i].Offset; 202 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); 203 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); 204 isKill = MemOps[i].MBBI->getOperand(0).isKill(); 205 // AM4 - register numbers in ascending order. 206 // AM5 - consecutive register numbers in ascending order. 207 if (NewOffset == Offset + (int)Size && 208 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 209 Offset += Size; 210 Regs.push_back(std::make_pair(Reg, isKill)); 211 PRegNum = RegNum; 212 } else { 213 // Can't merge this in. Try merge the earlier ones first. 214 if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, 215 Scratch, Regs, TII)) { 216 Merges.push_back(prior(Loc)); 217 for (unsigned j = SIndex; j < i; ++j) { 218 MBB.erase(MemOps[j].MBBI); 219 MemOps[j].Merged = true; 220 } 221 } 222 SmallVector<MachineBasicBlock::iterator, 4> Merges2 = 223 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps); 224 Merges.append(Merges2.begin(), Merges2.end()); 225 return Merges; 226 } 227 228 if (MemOps[i].Position > Pos) { 229 Pos = MemOps[i].Position; 230 Loc = MemOps[i].MBBI; 231 } 232 } 233 234 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 235 if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, 236 Scratch, Regs, TII)) { 237 Merges.push_back(prior(Loc)); 238 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { 239 MBB.erase(MemOps[i].MBBI); 240 MemOps[i].Merged = true; 241 } 242 } 243 244 return Merges; 245} 246 247/// getInstrPredicate - If instruction is predicated, returns its predicate 248/// condition, otherwise returns AL. It also returns the condition code 249/// register by reference. 250static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { 251 int PIdx = MI->findFirstPredOperandIdx(); 252 if (PIdx == -1) { 253 PredReg = 0; 254 return ARMCC::AL; 255 } 256 257 PredReg = MI->getOperand(PIdx+1).getReg(); 258 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 259} 260 261static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 262 unsigned Bytes, ARMCC::CondCodes Pred, 263 unsigned PredReg) { 264 unsigned MyPredReg = 0; 265 return (MI && MI->getOpcode() == ARM::SUBri && 266 MI->getOperand(0).getReg() == Base && 267 MI->getOperand(1).getReg() == Base && 268 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && 269 getInstrPredicate(MI, MyPredReg) == Pred && 270 MyPredReg == PredReg); 271} 272 273static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 274 unsigned Bytes, ARMCC::CondCodes Pred, 275 unsigned PredReg) { 276 unsigned MyPredReg = 0; 277 return (MI && MI->getOpcode() == ARM::ADDri && 278 MI->getOperand(0).getReg() == Base && 279 MI->getOperand(1).getReg() == Base && 280 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && 281 getInstrPredicate(MI, MyPredReg) == Pred && 282 MyPredReg == PredReg); 283} 284 285static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 286 switch (MI->getOpcode()) { 287 default: return 0; 288 case ARM::LDR: 289 case ARM::STR: 290 case ARM::FLDS: 291 case ARM::FSTS: 292 return 4; 293 case ARM::FLDD: 294 case ARM::FSTD: 295 return 8; 296 case ARM::LDM: 297 case ARM::STM: 298 return (MI->getNumOperands() - 4) * 4; 299 case ARM::FLDMS: 300 case ARM::FSTMS: 301 case ARM::FLDMD: 302 case ARM::FSTMD: 303 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 304 } 305} 306 307/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 308/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: 309/// 310/// stmia rn, <ra, rb, rc> 311/// rn := rn + 4 * 3; 312/// => 313/// stmia rn!, <ra, rb, rc> 314/// 315/// rn := rn - 4 * 3; 316/// ldmia rn, <ra, rb, rc> 317/// => 318/// ldmdb rn!, <ra, rb, rc> 319static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 320 MachineBasicBlock::iterator MBBI, 321 bool &Advance, 322 MachineBasicBlock::iterator &I) { 323 MachineInstr *MI = MBBI; 324 unsigned Base = MI->getOperand(0).getReg(); 325 unsigned Bytes = getLSMultipleTransferSize(MI); 326 unsigned PredReg = 0; 327 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 328 int Opcode = MI->getOpcode(); 329 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; 330 331 if (isAM4) { 332 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 333 return false; 334 335 // Can't use the updating AM4 sub-mode if the base register is also a dest 336 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 337 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 338 if (MI->getOperand(i).getReg() == Base) 339 return false; 340 } 341 342 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 343 if (MBBI != MBB.begin()) { 344 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 345 if (Mode == ARM_AM::ia && 346 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 347 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 348 MBB.erase(PrevMBBI); 349 return true; 350 } else if (Mode == ARM_AM::ib && 351 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 352 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 353 MBB.erase(PrevMBBI); 354 return true; 355 } 356 } 357 358 if (MBBI != MBB.end()) { 359 MachineBasicBlock::iterator NextMBBI = next(MBBI); 360 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 361 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 362 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 363 if (NextMBBI == I) { 364 Advance = true; 365 ++I; 366 } 367 MBB.erase(NextMBBI); 368 return true; 369 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 370 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 371 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 372 if (NextMBBI == I) { 373 Advance = true; 374 ++I; 375 } 376 MBB.erase(NextMBBI); 377 return true; 378 } 379 } 380 } else { 381 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. 382 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 383 return false; 384 385 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 386 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 387 if (MBBI != MBB.begin()) { 388 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 389 if (Mode == ARM_AM::ia && 390 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 391 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 392 MBB.erase(PrevMBBI); 393 return true; 394 } 395 } 396 397 if (MBBI != MBB.end()) { 398 MachineBasicBlock::iterator NextMBBI = next(MBBI); 399 if (Mode == ARM_AM::ia && 400 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 401 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 402 if (NextMBBI == I) { 403 Advance = true; 404 ++I; 405 } 406 MBB.erase(NextMBBI); 407 } 408 return true; 409 } 410 } 411 412 return false; 413} 414 415static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 416 switch (Opc) { 417 case ARM::LDR: return ARM::LDR_PRE; 418 case ARM::STR: return ARM::STR_PRE; 419 case ARM::FLDS: return ARM::FLDMS; 420 case ARM::FLDD: return ARM::FLDMD; 421 case ARM::FSTS: return ARM::FSTMS; 422 case ARM::FSTD: return ARM::FSTMD; 423 default: abort(); 424 } 425 return 0; 426} 427 428static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 429 switch (Opc) { 430 case ARM::LDR: return ARM::LDR_POST; 431 case ARM::STR: return ARM::STR_POST; 432 case ARM::FLDS: return ARM::FLDMS; 433 case ARM::FLDD: return ARM::FLDMD; 434 case ARM::FSTS: return ARM::FSTMS; 435 case ARM::FSTD: return ARM::FSTMD; 436 default: abort(); 437 } 438 return 0; 439} 440 441/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 442/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 443static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 444 MachineBasicBlock::iterator MBBI, 445 const TargetInstrInfo *TII, 446 bool &Advance, 447 MachineBasicBlock::iterator &I) { 448 MachineInstr *MI = MBBI; 449 unsigned Base = MI->getOperand(1).getReg(); 450 bool BaseKill = MI->getOperand(1).isKill(); 451 unsigned Bytes = getLSMultipleTransferSize(MI); 452 int Opcode = MI->getOpcode(); 453 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 454 if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || 455 (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) 456 return false; 457 458 bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 459 // Can't do the merge if the destination register is the same as the would-be 460 // writeback register. 461 if (isLd && MI->getOperand(0).getReg() == Base) 462 return false; 463 464 unsigned PredReg = 0; 465 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 466 bool DoMerge = false; 467 ARM_AM::AddrOpc AddSub = ARM_AM::add; 468 unsigned NewOpc = 0; 469 if (MBBI != MBB.begin()) { 470 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 471 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 472 DoMerge = true; 473 AddSub = ARM_AM::sub; 474 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 475 } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes, 476 Pred, PredReg)) { 477 DoMerge = true; 478 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 479 } 480 if (DoMerge) 481 MBB.erase(PrevMBBI); 482 } 483 484 if (!DoMerge && MBBI != MBB.end()) { 485 MachineBasicBlock::iterator NextMBBI = next(MBBI); 486 if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 487 DoMerge = true; 488 AddSub = ARM_AM::sub; 489 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 490 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 491 DoMerge = true; 492 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 493 } 494 if (DoMerge) { 495 if (NextMBBI == I) { 496 Advance = true; 497 ++I; 498 } 499 MBB.erase(NextMBBI); 500 } 501 } 502 503 if (!DoMerge) 504 return false; 505 506 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; 507 unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) 508 : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, 509 true, isDPR ? 2 : 1); 510 if (isLd) { 511 if (isAM2) 512 // LDR_PRE, LDR_POST; 513 BuildMI(MBB, MBBI, TII->get(NewOpc), MI->getOperand(0).getReg()) 514 .addReg(Base, true) 515 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 516 else 517 // FLDMS, FLDMD 518 BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base, false, false, BaseKill) 519 .addImm(Offset).addImm(Pred).addReg(PredReg) 520 .addReg(MI->getOperand(0).getReg(), true); 521 } else { 522 MachineOperand &MO = MI->getOperand(0); 523 if (isAM2) 524 // STR_PRE, STR_POST; 525 BuildMI(MBB, MBBI, TII->get(NewOpc), Base) 526 .addReg(MO.getReg(), false, false, MO.isKill()) 527 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 528 else 529 // FSTMS, FSTMD 530 BuildMI(MBB, MBBI, TII->get(NewOpc)).addReg(Base).addImm(Offset) 531 .addImm(Pred).addReg(PredReg) 532 .addReg(MO.getReg(), false, false, MO.isKill()); 533 } 534 MBB.erase(MBBI); 535 536 return true; 537} 538 539/// isMemoryOp - Returns true if instruction is a memory operations (that this 540/// pass is capable of operating on). 541static bool isMemoryOp(MachineInstr *MI) { 542 int Opcode = MI->getOpcode(); 543 switch (Opcode) { 544 default: break; 545 case ARM::LDR: 546 case ARM::STR: 547 return MI->getOperand(1).isRegister() && MI->getOperand(2).getReg() == 0; 548 case ARM::FLDS: 549 case ARM::FSTS: 550 return MI->getOperand(1).isRegister(); 551 case ARM::FLDD: 552 case ARM::FSTD: 553 return MI->getOperand(1).isRegister(); 554 } 555 return false; 556} 557 558/// AdvanceRS - Advance register scavenger to just before the earliest memory 559/// op that is being merged. 560void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 561 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 562 unsigned Position = MemOps[0].Position; 563 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 564 if (MemOps[i].Position < Position) { 565 Position = MemOps[i].Position; 566 Loc = MemOps[i].MBBI; 567 } 568 } 569 570 if (Loc != MBB.begin()) 571 RS->forward(prior(Loc)); 572} 573 574/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 575/// ops of the same base and incrementing offset into LDM / STM ops. 576bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 577 unsigned NumMerges = 0; 578 unsigned NumMemOps = 0; 579 MemOpQueue MemOps; 580 unsigned CurrBase = 0; 581 int CurrOpc = -1; 582 unsigned CurrSize = 0; 583 ARMCC::CondCodes CurrPred = ARMCC::AL; 584 unsigned CurrPredReg = 0; 585 unsigned Position = 0; 586 587 RS->enterBasicBlock(&MBB); 588 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 589 while (MBBI != E) { 590 bool Advance = false; 591 bool TryMerge = false; 592 bool Clobber = false; 593 594 bool isMemOp = isMemoryOp(MBBI); 595 if (isMemOp) { 596 int Opcode = MBBI->getOpcode(); 597 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 598 unsigned Size = getLSMultipleTransferSize(MBBI); 599 unsigned Base = MBBI->getOperand(1).getReg(); 600 unsigned PredReg = 0; 601 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 602 const TargetInstrDescriptor *TID = MBBI->getDesc(); 603 unsigned OffField = MBBI->getOperand(TID->numOperands-3).getImm(); 604 int Offset = isAM2 605 ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; 606 if (isAM2) { 607 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 608 Offset = -Offset; 609 } else { 610 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 611 Offset = -Offset; 612 } 613 // Watch out for: 614 // r4 := ldr [r5] 615 // r5 := ldr [r5, #4] 616 // r6 := ldr [r5, #8] 617 // 618 // The second ldr has effectively broken the chain even though it 619 // looks like the later ldr(s) use the same base register. Try to 620 // merge the ldr's so far, including this one. But don't try to 621 // combine the following ldr(s). 622 Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); 623 if (CurrBase == 0 && !Clobber) { 624 // Start of a new chain. 625 CurrBase = Base; 626 CurrOpc = Opcode; 627 CurrSize = Size; 628 CurrPred = Pred; 629 CurrPredReg = PredReg; 630 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 631 NumMemOps++; 632 Advance = true; 633 } else { 634 if (Clobber) { 635 TryMerge = true; 636 Advance = true; 637 } 638 639 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 640 // No need to match PredReg. 641 // Continue adding to the queue. 642 if (Offset > MemOps.back().Offset) { 643 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 644 NumMemOps++; 645 Advance = true; 646 } else { 647 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 648 I != E; ++I) { 649 if (Offset < I->Offset) { 650 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 651 NumMemOps++; 652 Advance = true; 653 break; 654 } else if (Offset == I->Offset) { 655 // Collision! This can't be merged! 656 break; 657 } 658 } 659 } 660 } 661 } 662 } 663 664 if (Advance) { 665 ++Position; 666 ++MBBI; 667 } else 668 TryMerge = true; 669 670 if (TryMerge) { 671 if (NumMemOps > 1) { 672 // Try to find a free register to use as a new base in case it's needed. 673 // First advance to the instruction just before the start of the chain. 674 AdvanceRS(MBB, MemOps); 675 // Find a scratch register. Make sure it's a call clobbered register or 676 // a spilled callee-saved register. 677 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); 678 if (!Scratch) 679 Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, 680 AFI->getSpilledCSRegisters()); 681 // Process the load / store instructions. 682 RS->forward(prior(MBBI)); 683 684 // Merge ops. 685 SmallVector<MachineBasicBlock::iterator,4> MBBII = 686 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 687 CurrPred, CurrPredReg, Scratch, MemOps); 688 689 // Try folding preceeding/trailing base inc/dec into the generated 690 // LDM/STM ops. 691 for (unsigned i = 0, e = MBBII.size(); i < e; ++i) 692 if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI)) 693 NumMerges++; 694 NumMerges += MBBII.size(); 695 696 // Try folding preceeding/trailing base inc/dec into those load/store 697 // that were not merged to form LDM/STM ops. 698 for (unsigned i = 0; i != NumMemOps; ++i) 699 if (!MemOps[i].Merged) 700 if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 701 NumMerges++; 702 703 // RS may be pointing to an instruction that's deleted. 704 RS->skipTo(prior(MBBI)); 705 } 706 707 CurrBase = 0; 708 CurrOpc = -1; 709 CurrSize = 0; 710 CurrPred = ARMCC::AL; 711 CurrPredReg = 0; 712 if (NumMemOps) { 713 MemOps.clear(); 714 NumMemOps = 0; 715 } 716 717 // If iterator hasn't been advanced and this is not a memory op, skip it. 718 // It can't start a new chain anyway. 719 if (!Advance && !isMemOp && MBBI != E) { 720 ++Position; 721 ++MBBI; 722 } 723 } 724 } 725 return NumMerges > 0; 726} 727 728/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 729/// (bx lr) into the preceeding stack restore so it directly restore the value 730/// of LR into pc. 731/// ldmfd sp!, {r7, lr} 732/// bx lr 733/// => 734/// ldmfd sp!, {r7, pc} 735bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 736 if (MBB.empty()) return false; 737 738 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 739 if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { 740 MachineInstr *PrevMI = prior(MBBI); 741 if (PrevMI->getOpcode() == ARM::LDM) { 742 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 743 if (MO.getReg() == ARM::LR) { 744 PrevMI->setInstrDescriptor(TII->get(ARM::LDM_RET)); 745 MO.setReg(ARM::PC); 746 MBB.erase(MBBI); 747 return true; 748 } 749 } 750 } 751 return false; 752} 753 754bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 755 const TargetMachine &TM = Fn.getTarget(); 756 AFI = Fn.getInfo<ARMFunctionInfo>(); 757 TII = TM.getInstrInfo(); 758 MRI = TM.getRegisterInfo(); 759 RS = new RegScavenger(); 760 761 bool Modified = false; 762 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 763 ++MFI) { 764 MachineBasicBlock &MBB = *MFI; 765 Modified |= LoadStoreMultipleOpti(MBB); 766 Modified |= MergeReturnIntoLDM(MBB); 767 } 768 769 delete RS; 770 return Modified; 771} 772