ARMLoadStoreOptimizer.cpp revision 9c06178e355be69e313820d31878022bc882c2db
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "llvm/DerivedTypes.h" 21#include "llvm/CodeGen/MachineBasicBlock.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstr.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/RegisterScavenging.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Target/TargetInstrInfo.h" 29#include "llvm/Target/TargetMachine.h" 30#include "llvm/Target/TargetRegisterInfo.h" 31#include "llvm/Support/Compiler.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/ADT/DenseMap.h" 34#include "llvm/ADT/STLExtras.h" 35#include "llvm/ADT/SmallPtrSet.h" 36#include "llvm/ADT/SmallSet.h" 37#include "llvm/ADT/SmallVector.h" 38#include "llvm/ADT/Statistic.h" 39using namespace llvm; 40 41STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 42STATISTIC(NumSTMGened , "Number of stm instructions generated"); 43STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); 44STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); 45STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 46STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 47STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 48STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 49STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 50STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 51STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 52 53/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 54/// load / store instructions to form ldm / stm instructions. 55 56namespace { 57 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { 58 static char ID; 59 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 60 61 const TargetInstrInfo *TII; 62 const TargetRegisterInfo *TRI; 63 ARMFunctionInfo *AFI; 64 RegScavenger *RS; 65 bool isThumb2; 66 67 virtual bool runOnMachineFunction(MachineFunction &Fn); 68 69 virtual const char *getPassName() const { 70 return "ARM load / store optimization pass"; 71 } 72 73 private: 74 struct MemOpQueueEntry { 75 int Offset; 76 unsigned Position; 77 MachineBasicBlock::iterator MBBI; 78 bool Merged; 79 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 80 : Offset(o), Position(p), MBBI(i), Merged(false) {}; 81 }; 82 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 83 typedef MemOpQueue::iterator MemOpQueueIter; 84 85 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 86 int Offset, unsigned Base, bool BaseKill, int Opcode, 87 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 88 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 89 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 90 int Opcode, unsigned Size, 91 ARMCC::CondCodes Pred, unsigned PredReg, 92 unsigned Scratch, MemOpQueue &MemOps, 93 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 94 95 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 96 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 97 MachineBasicBlock::iterator &MBBI); 98 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 99 MachineBasicBlock::iterator MBBI, 100 const TargetInstrInfo *TII, 101 bool &Advance, 102 MachineBasicBlock::iterator &I); 103 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 104 MachineBasicBlock::iterator MBBI, 105 bool &Advance, 106 MachineBasicBlock::iterator &I); 107 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 108 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 109 }; 110 char ARMLoadStoreOpt::ID = 0; 111} 112 113static int getLoadStoreMultipleOpcode(int Opcode) { 114 switch (Opcode) { 115 case ARM::LDR: 116 NumLDMGened++; 117 return ARM::LDM; 118 case ARM::STR: 119 NumSTMGened++; 120 return ARM::STM; 121 case ARM::t2LDRi8: 122 case ARM::t2LDRi12: 123 NumLDMGened++; 124 return ARM::t2LDM; 125 case ARM::t2STRi8: 126 case ARM::t2STRi12: 127 NumSTMGened++; 128 return ARM::t2STM; 129 case ARM::FLDS: 130 NumFLDMGened++; 131 return ARM::FLDMS; 132 case ARM::FSTS: 133 NumFSTMGened++; 134 return ARM::FSTMS; 135 case ARM::FLDD: 136 NumFLDMGened++; 137 return ARM::FLDMD; 138 case ARM::FSTD: 139 NumFSTMGened++; 140 return ARM::FSTMD; 141 default: LLVM_UNREACHABLE("Unhandled opcode!"); 142 } 143 return 0; 144} 145 146static bool isi32Load(unsigned Opc) { 147 return Opc == ARM::LDR || Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 148} 149 150static bool isi32Store(unsigned Opc) { 151 return Opc == ARM::STR || Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 152} 153 154/// MergeOps - Create and insert a LDM or STM with Base as base register and 155/// registers in Regs as the register operands that would be loaded / stored. 156/// It returns true if the transformation is done. 157bool 158ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 159 MachineBasicBlock::iterator MBBI, 160 int Offset, unsigned Base, bool BaseKill, 161 int Opcode, ARMCC::CondCodes Pred, 162 unsigned PredReg, unsigned Scratch, DebugLoc dl, 163 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 164 // Only a single register to load / store. Don't bother. 165 unsigned NumRegs = Regs.size(); 166 if (NumRegs <= 1) 167 return false; 168 169 ARM_AM::AMSubMode Mode = ARM_AM::ia; 170 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 171 if (isAM4 && Offset == 4) 172 Mode = ARM_AM::ib; 173 else if (isAM4 && Offset == -4 * (int)NumRegs + 4) 174 Mode = ARM_AM::da; 175 else if (isAM4 && Offset == -4 * (int)NumRegs) 176 Mode = ARM_AM::db; 177 else if (Offset != 0) { 178 // If starting offset isn't zero, insert a MI to materialize a new base. 179 // But only do so if it is cost effective, i.e. merging more than two 180 // loads / stores. 181 if (NumRegs <= 2) 182 return false; 183 184 unsigned NewBase; 185 if (isi32Load(Opcode)) 186 // If it is a load, then just use one of the destination register to 187 // use as the new base. 188 NewBase = Regs[NumRegs-1].first; 189 else { 190 // Use the scratch register to use as a new base. 191 NewBase = Scratch; 192 if (NewBase == 0) 193 return false; 194 } 195 int BaseOpc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; 196 if (Offset < 0) { 197 BaseOpc = isThumb2 ? ARM::t2SUBri : ARM::SUBri; 198 Offset = - Offset; 199 } 200 int ImmedOffset = isThumb2 201 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 202 if (ImmedOffset == -1) 203 // FIXME: Try t2ADDri12 or t2SUBri12? 204 return false; // Probably not worth it then. 205 206 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 207 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 208 .addImm(Pred).addReg(PredReg).addReg(0); 209 Base = NewBase; 210 BaseKill = true; // New base is always killed right its use. 211 } 212 213 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; 214 bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 215 Opcode = getLoadStoreMultipleOpcode(Opcode); 216 MachineInstrBuilder MIB = (isAM4) 217 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 218 .addReg(Base, getKillRegState(BaseKill)) 219 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 220 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 221 .addReg(Base, getKillRegState(BaseKill)) 222 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) 223 .addImm(Pred).addReg(PredReg); 224 for (unsigned i = 0; i != NumRegs; ++i) 225 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 226 | getKillRegState(Regs[i].second)); 227 228 return true; 229} 230 231/// MergeLDR_STR - Merge a number of load / store instructions into one or more 232/// load / store multiple instructions. 233void 234ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 235 unsigned Base, int Opcode, unsigned Size, 236 ARMCC::CondCodes Pred, unsigned PredReg, 237 unsigned Scratch, MemOpQueue &MemOps, 238 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 239 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 240 int Offset = MemOps[SIndex].Offset; 241 int SOffset = Offset; 242 unsigned Pos = MemOps[SIndex].Position; 243 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 244 DebugLoc dl = Loc->getDebugLoc(); 245 unsigned PReg = Loc->getOperand(0).getReg(); 246 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); 247 bool isKill = Loc->getOperand(0).isKill(); 248 249 SmallVector<std::pair<unsigned,bool>, 8> Regs; 250 Regs.push_back(std::make_pair(PReg, isKill)); 251 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 252 int NewOffset = MemOps[i].Offset; 253 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); 254 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); 255 isKill = MemOps[i].MBBI->getOperand(0).isKill(); 256 // AM4 - register numbers in ascending order. 257 // AM5 - consecutive register numbers in ascending order. 258 if (NewOffset == Offset + (int)Size && 259 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 260 Offset += Size; 261 Regs.push_back(std::make_pair(Reg, isKill)); 262 PRegNum = RegNum; 263 } else { 264 // Can't merge this in. Try merge the earlier ones first. 265 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, 266 Scratch, dl, Regs)) { 267 Merges.push_back(prior(Loc)); 268 for (unsigned j = SIndex; j < i; ++j) { 269 MBB.erase(MemOps[j].MBBI); 270 MemOps[j].Merged = true; 271 } 272 } 273 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 274 MemOps, Merges); 275 return; 276 } 277 278 if (MemOps[i].Position > Pos) { 279 Pos = MemOps[i].Position; 280 Loc = MemOps[i].MBBI; 281 } 282 } 283 284 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 285 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, 286 Scratch, dl, Regs)) { 287 Merges.push_back(prior(Loc)); 288 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { 289 MBB.erase(MemOps[i].MBBI); 290 MemOps[i].Merged = true; 291 } 292 } 293 294 return; 295} 296 297/// getInstrPredicate - If instruction is predicated, returns its predicate 298/// condition, otherwise returns AL. It also returns the condition code 299/// register by reference. 300static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { 301 int PIdx = MI->findFirstPredOperandIdx(); 302 if (PIdx == -1) { 303 PredReg = 0; 304 return ARMCC::AL; 305 } 306 307 PredReg = MI->getOperand(PIdx+1).getReg(); 308 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 309} 310 311static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 312 unsigned Bytes, ARMCC::CondCodes Pred, 313 unsigned PredReg, bool isThumb2) { 314 unsigned MyPredReg = 0; 315 if (!MI) 316 return false; 317 if (isThumb2) { 318 if (MI->getOpcode() != ARM::t2SUBri) 319 return false; 320 // Make sure the offset fits in 8 bits. 321 if (Bytes <= 0 || Bytes >= 0x100) 322 return false; 323 } else { 324 if (MI->getOpcode() != ARM::SUBri) 325 return false; 326 // Make sure the offset fits in 12 bits. 327 if (Bytes <= 0 || Bytes >= 0x1000) 328 return false; 329 } 330 331 return (MI->getOperand(0).getReg() == Base && 332 MI->getOperand(1).getReg() == Base && 333 MI->getOperand(2).getImm() == Bytes && 334 getInstrPredicate(MI, MyPredReg) == Pred && 335 MyPredReg == PredReg); 336} 337 338static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 339 unsigned Bytes, ARMCC::CondCodes Pred, 340 unsigned PredReg, bool isThumb2) { 341 unsigned MyPredReg = 0; 342 if (!MI) 343 return false; 344 if (isThumb2) { 345 if (MI->getOpcode() != ARM::t2ADDri) 346 return false; 347 // Make sure the offset fits in 8 bits. 348 if (Bytes <= 0 || Bytes >= 0x100) 349 return false; 350 } else { 351 if (MI->getOpcode() != ARM::ADDri) 352 return false; 353 // Make sure the offset fits in 12 bits. 354 if (Bytes <= 0 || Bytes >= 0x1000) 355 return false; 356 } 357 358 return (MI->getOperand(0).getReg() == Base && 359 MI->getOperand(1).getReg() == Base && 360 MI->getOperand(2).getImm() == Bytes && 361 getInstrPredicate(MI, MyPredReg) == Pred && 362 MyPredReg == PredReg); 363} 364 365static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 366 switch (MI->getOpcode()) { 367 default: return 0; 368 case ARM::LDR: 369 case ARM::STR: 370 case ARM::t2LDRi8: 371 case ARM::t2LDRi12: 372 case ARM::t2STRi8: 373 case ARM::t2STRi12: 374 case ARM::FLDS: 375 case ARM::FSTS: 376 return 4; 377 case ARM::FLDD: 378 case ARM::FSTD: 379 return 8; 380 case ARM::LDM: 381 case ARM::STM: 382 return (MI->getNumOperands() - 4) * 4; 383 case ARM::FLDMS: 384 case ARM::FSTMS: 385 case ARM::FLDMD: 386 case ARM::FSTMD: 387 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 388 } 389} 390 391/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 392/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: 393/// 394/// stmia rn, <ra, rb, rc> 395/// rn := rn + 4 * 3; 396/// => 397/// stmia rn!, <ra, rb, rc> 398/// 399/// rn := rn - 4 * 3; 400/// ldmia rn, <ra, rb, rc> 401/// => 402/// ldmdb rn!, <ra, rb, rc> 403bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 404 MachineBasicBlock::iterator MBBI, 405 bool &Advance, 406 MachineBasicBlock::iterator &I) { 407 MachineInstr *MI = MBBI; 408 unsigned Base = MI->getOperand(0).getReg(); 409 unsigned Bytes = getLSMultipleTransferSize(MI); 410 unsigned PredReg = 0; 411 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 412 int Opcode = MI->getOpcode(); 413 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM || 414 Opcode == ARM::STM || Opcode == ARM::t2STM; 415 416 if (isAM4) { 417 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 418 return false; 419 420 // Can't use the updating AM4 sub-mode if the base register is also a dest 421 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 422 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 423 if (MI->getOperand(i).getReg() == Base) 424 return false; 425 } 426 427 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 428 if (MBBI != MBB.begin()) { 429 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 430 if (Mode == ARM_AM::ia && 431 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg, isThumb2)) { 432 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 433 MBB.erase(PrevMBBI); 434 return true; 435 } else if (Mode == ARM_AM::ib && 436 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg, 437 isThumb2)) { 438 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 439 MBB.erase(PrevMBBI); 440 return true; 441 } 442 } 443 444 if (MBBI != MBB.end()) { 445 MachineBasicBlock::iterator NextMBBI = next(MBBI); 446 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 447 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg, isThumb2)) { 448 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 449 if (NextMBBI == I) { 450 Advance = true; 451 ++I; 452 } 453 MBB.erase(NextMBBI); 454 return true; 455 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 456 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg, 457 isThumb2)) { 458 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 459 if (NextMBBI == I) { 460 Advance = true; 461 ++I; 462 } 463 MBB.erase(NextMBBI); 464 return true; 465 } 466 } 467 } else { 468 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. 469 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 470 return false; 471 472 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 473 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 474 if (MBBI != MBB.begin()) { 475 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 476 if (Mode == ARM_AM::ia && 477 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg, isThumb2)) { 478 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 479 MBB.erase(PrevMBBI); 480 return true; 481 } 482 } 483 484 if (MBBI != MBB.end()) { 485 MachineBasicBlock::iterator NextMBBI = next(MBBI); 486 if (Mode == ARM_AM::ia && 487 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg, isThumb2)) { 488 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 489 if (NextMBBI == I) { 490 Advance = true; 491 ++I; 492 } 493 MBB.erase(NextMBBI); 494 } 495 return true; 496 } 497 } 498 499 return false; 500} 501 502static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 503 switch (Opc) { 504 case ARM::LDR: return ARM::LDR_PRE; 505 case ARM::STR: return ARM::STR_PRE; 506 case ARM::FLDS: return ARM::FLDMS; 507 case ARM::FLDD: return ARM::FLDMD; 508 case ARM::FSTS: return ARM::FSTMS; 509 case ARM::FSTD: return ARM::FSTMD; 510 case ARM::t2LDRi8: 511 case ARM::t2LDRi12: 512 return ARM::t2LDR_PRE; 513 case ARM::t2STRi8: 514 case ARM::t2STRi12: 515 return ARM::t2STR_PRE; 516 default: LLVM_UNREACHABLE("Unhandled opcode!"); 517 } 518 return 0; 519} 520 521static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 522 switch (Opc) { 523 case ARM::LDR: return ARM::LDR_POST; 524 case ARM::STR: return ARM::STR_POST; 525 case ARM::FLDS: return ARM::FLDMS; 526 case ARM::FLDD: return ARM::FLDMD; 527 case ARM::FSTS: return ARM::FSTMS; 528 case ARM::FSTD: return ARM::FSTMD; 529 case ARM::t2LDRi8: 530 case ARM::t2LDRi12: 531 return ARM::t2LDR_POST; 532 case ARM::t2STRi8: 533 case ARM::t2STRi12: 534 return ARM::t2STR_POST; 535 default: LLVM_UNREACHABLE("Unhandled opcode!"); 536 } 537 return 0; 538} 539 540/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 541/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 542bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 543 MachineBasicBlock::iterator MBBI, 544 const TargetInstrInfo *TII, 545 bool &Advance, 546 MachineBasicBlock::iterator &I) { 547 MachineInstr *MI = MBBI; 548 unsigned Base = MI->getOperand(1).getReg(); 549 bool BaseKill = MI->getOperand(1).isKill(); 550 unsigned Bytes = getLSMultipleTransferSize(MI); 551 int Opcode = MI->getOpcode(); 552 DebugLoc dl = MI->getDebugLoc(); 553 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 554 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 555 return false; 556 else if (!isAM2 && !isThumb2 && 557 ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 558 return false; 559 else if (isThumb2 && MI->getOperand(2).getImm() != 0) 560 return false; 561 562 bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 563 // Can't do the merge if the destination register is the same as the would-be 564 // writeback register. 565 if (isLd && MI->getOperand(0).getReg() == Base) 566 return false; 567 568 unsigned PredReg = 0; 569 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 570 bool DoMerge = false; 571 ARM_AM::AddrOpc AddSub = ARM_AM::add; 572 unsigned NewOpc = 0; 573 if (MBBI != MBB.begin()) { 574 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 575 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg, isThumb2)) { 576 DoMerge = true; 577 AddSub = ARM_AM::sub; 578 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 579 } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes, 580 Pred, PredReg, isThumb2)) { 581 DoMerge = true; 582 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 583 } 584 if (DoMerge) 585 MBB.erase(PrevMBBI); 586 } 587 588 if (!DoMerge && MBBI != MBB.end()) { 589 MachineBasicBlock::iterator NextMBBI = next(MBBI); 590 if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg, 591 isThumb2)) { 592 DoMerge = true; 593 AddSub = ARM_AM::sub; 594 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 595 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg, 596 isThumb2)) { 597 DoMerge = true; 598 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 599 } 600 if (DoMerge) { 601 if (NextMBBI == I) { 602 Advance = true; 603 ++I; 604 } 605 MBB.erase(NextMBBI); 606 } 607 } 608 609 if (!DoMerge) 610 return false; 611 612 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; 613 unsigned Offset = isAM2 614 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) 615 : (isThumb2 616 ? Bytes 617 : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, 618 true, isDPR ? 2 : 1)); 619 if (isLd) { 620 if (isAM2 || isThumb2) 621 // LDR_PRE, LDR_POST, t2LDR_PRE, t2LDR_POST 622 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 623 .addReg(Base, RegState::Define) 624 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 625 else if (!isThumb2) 626 // FLDMS, FLDMD 627 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 628 .addReg(Base, getKillRegState(BaseKill)) 629 .addImm(Offset).addImm(Pred).addReg(PredReg) 630 .addReg(MI->getOperand(0).getReg(), RegState::Define); 631 } else { 632 MachineOperand &MO = MI->getOperand(0); 633 if (isAM2 || isThumb2) 634 // STR_PRE, STR_POST, t2STR_PRE, t2STR_POST 635 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 636 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 637 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 638 else 639 // FSTMS, FSTMD 640 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) 641 .addImm(Pred).addReg(PredReg) 642 .addReg(MO.getReg(), getKillRegState(MO.isKill())); 643 } 644 MBB.erase(MBBI); 645 646 return true; 647} 648 649/// isMemoryOp - Returns true if instruction is a memory operations (that this 650/// pass is capable of operating on). 651static bool isMemoryOp(const MachineInstr *MI) { 652 int Opcode = MI->getOpcode(); 653 switch (Opcode) { 654 default: break; 655 case ARM::LDR: 656 case ARM::STR: 657 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 658 case ARM::FLDS: 659 case ARM::FSTS: 660 return MI->getOperand(1).isReg(); 661 case ARM::FLDD: 662 case ARM::FSTD: 663 return MI->getOperand(1).isReg(); 664 case ARM::t2LDRi8: 665 case ARM::t2LDRi12: 666 case ARM::t2STRi8: 667 case ARM::t2STRi12: 668 return true; 669 } 670 return false; 671} 672 673/// AdvanceRS - Advance register scavenger to just before the earliest memory 674/// op that is being merged. 675void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 676 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 677 unsigned Position = MemOps[0].Position; 678 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 679 if (MemOps[i].Position < Position) { 680 Position = MemOps[i].Position; 681 Loc = MemOps[i].MBBI; 682 } 683 } 684 685 if (Loc != MBB.begin()) 686 RS->forward(prior(Loc)); 687} 688 689static int getMemoryOpOffset(const MachineInstr *MI) { 690 int Opcode = MI->getOpcode(); 691 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 692 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 693 unsigned NumOperands = MI->getDesc().getNumOperands(); 694 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 695 696 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 697 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 698 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 699 return OffField; 700 701 int Offset = isAM2 702 ? ARM_AM::getAM2Offset(OffField) 703 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 704 : ARM_AM::getAM5Offset(OffField) * 4); 705 if (isAM2) { 706 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 707 Offset = -Offset; 708 } else if (isAM3) { 709 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 710 Offset = -Offset; 711 } else { 712 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 713 Offset = -Offset; 714 } 715 return Offset; 716} 717 718static void InsertLDR_STR(MachineBasicBlock &MBB, 719 MachineBasicBlock::iterator &MBBI, 720 int OffImm, bool isDef, 721 DebugLoc dl, unsigned NewOpc, 722 unsigned Reg, bool RegDeadKill, 723 unsigned BaseReg, bool BaseKill, 724 unsigned OffReg, bool OffKill, 725 ARMCC::CondCodes Pred, unsigned PredReg, 726 const TargetInstrInfo *TII) { 727 unsigned Offset; 728 if (OffImm < 0) 729 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 730 else 731 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 732 if (isDef) 733 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 734 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 735 .addReg(BaseReg, getKillRegState(BaseKill)) 736 .addReg(OffReg, getKillRegState(OffKill)) 737 .addImm(Offset) 738 .addImm(Pred).addReg(PredReg); 739 else 740 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 741 .addReg(Reg, getKillRegState(RegDeadKill)) 742 .addReg(BaseReg, getKillRegState(BaseKill)) 743 .addReg(OffReg, getKillRegState(OffKill)) 744 .addImm(Offset) 745 .addImm(Pred).addReg(PredReg); 746} 747 748bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 749 MachineBasicBlock::iterator &MBBI) { 750 MachineInstr *MI = &*MBBI; 751 unsigned Opcode = MI->getOpcode(); 752 if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { 753 unsigned EvenReg = MI->getOperand(0).getReg(); 754 unsigned OddReg = MI->getOperand(1).getReg(); 755 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 756 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 757 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 758 return false; 759 760 bool isLd = Opcode == ARM::LDRD; 761 bool EvenDeadKill = isLd ? 762 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 763 bool OddDeadKill = isLd ? 764 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 765 const MachineOperand &BaseOp = MI->getOperand(2); 766 unsigned BaseReg = BaseOp.getReg(); 767 bool BaseKill = BaseOp.isKill(); 768 const MachineOperand &OffOp = MI->getOperand(3); 769 unsigned OffReg = OffOp.getReg(); 770 bool OffKill = OffOp.isKill(); 771 int OffImm = getMemoryOpOffset(MI); 772 unsigned PredReg = 0; 773 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 774 775 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 776 // Ascending register numbers and no offset. It's safe to change it to a 777 // ldm or stm. 778 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; 779 if (isLd) { 780 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 781 .addReg(BaseReg, getKillRegState(BaseKill)) 782 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 783 .addImm(Pred).addReg(PredReg) 784 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 785 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 786 ++NumLDRD2LDM; 787 } else { 788 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 789 .addReg(BaseReg, getKillRegState(BaseKill)) 790 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 791 .addImm(Pred).addReg(PredReg) 792 .addReg(EvenReg, getKillRegState(EvenDeadKill)) 793 .addReg(OddReg, getKillRegState(OddDeadKill)); 794 ++NumSTRD2STM; 795 } 796 } else { 797 // Split into two instructions. 798 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; 799 DebugLoc dl = MBBI->getDebugLoc(); 800 // If this is a load and base register is killed, it may have been 801 // re-defed by the load, make sure the first load does not clobber it. 802 if (isLd && 803 (BaseKill || OffKill) && 804 (TRI->regsOverlap(EvenReg, BaseReg) || 805 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 806 assert(!TRI->regsOverlap(OddReg, BaseReg) && 807 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 808 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, 809 BaseReg, false, OffReg, false, Pred, PredReg, TII); 810 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, 811 BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); 812 } else { 813 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 814 EvenReg, EvenDeadKill, BaseReg, false, OffReg, false, 815 Pred, PredReg, TII); 816 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 817 OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill, 818 Pred, PredReg, TII); 819 } 820 if (isLd) 821 ++NumLDRD2LDR; 822 else 823 ++NumSTRD2STR; 824 } 825 826 MBBI = prior(MBBI); 827 MBB.erase(MI); 828 } 829 return false; 830} 831 832/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 833/// ops of the same base and incrementing offset into LDM / STM ops. 834bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 835 unsigned NumMerges = 0; 836 unsigned NumMemOps = 0; 837 MemOpQueue MemOps; 838 unsigned CurrBase = 0; 839 int CurrOpc = -1; 840 unsigned CurrSize = 0; 841 ARMCC::CondCodes CurrPred = ARMCC::AL; 842 unsigned CurrPredReg = 0; 843 unsigned Position = 0; 844 SmallVector<MachineBasicBlock::iterator,4> Merges; 845 846 RS->enterBasicBlock(&MBB); 847 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 848 while (MBBI != E) { 849 if (FixInvalidRegPairOp(MBB, MBBI)) 850 continue; 851 852 bool Advance = false; 853 bool TryMerge = false; 854 bool Clobber = false; 855 856 bool isMemOp = isMemoryOp(MBBI); 857 if (isMemOp) { 858 int Opcode = MBBI->getOpcode(); 859 unsigned Size = getLSMultipleTransferSize(MBBI); 860 unsigned Base = MBBI->getOperand(1).getReg(); 861 unsigned PredReg = 0; 862 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 863 int Offset = getMemoryOpOffset(MBBI); 864 // Watch out for: 865 // r4 := ldr [r5] 866 // r5 := ldr [r5, #4] 867 // r6 := ldr [r5, #8] 868 // 869 // The second ldr has effectively broken the chain even though it 870 // looks like the later ldr(s) use the same base register. Try to 871 // merge the ldr's so far, including this one. But don't try to 872 // combine the following ldr(s). 873 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 874 if (CurrBase == 0 && !Clobber) { 875 // Start of a new chain. 876 CurrBase = Base; 877 CurrOpc = Opcode; 878 CurrSize = Size; 879 CurrPred = Pred; 880 CurrPredReg = PredReg; 881 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 882 NumMemOps++; 883 Advance = true; 884 } else { 885 if (Clobber) { 886 TryMerge = true; 887 Advance = true; 888 } 889 890 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 891 // No need to match PredReg. 892 // Continue adding to the queue. 893 if (Offset > MemOps.back().Offset) { 894 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 895 NumMemOps++; 896 Advance = true; 897 } else { 898 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 899 I != E; ++I) { 900 if (Offset < I->Offset) { 901 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 902 NumMemOps++; 903 Advance = true; 904 break; 905 } else if (Offset == I->Offset) { 906 // Collision! This can't be merged! 907 break; 908 } 909 } 910 } 911 } 912 } 913 } 914 915 if (Advance) { 916 ++Position; 917 ++MBBI; 918 } else 919 TryMerge = true; 920 921 if (TryMerge) { 922 if (NumMemOps > 1) { 923 // Try to find a free register to use as a new base in case it's needed. 924 // First advance to the instruction just before the start of the chain. 925 AdvanceRS(MBB, MemOps); 926 // Find a scratch register. Make sure it's a call clobbered register or 927 // a spilled callee-saved register. 928 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); 929 if (!Scratch) 930 Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, 931 AFI->getSpilledCSRegisters()); 932 // Process the load / store instructions. 933 RS->forward(prior(MBBI)); 934 935 // Merge ops. 936 Merges.clear(); 937 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 938 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 939 940 // Try folding preceeding/trailing base inc/dec into the generated 941 // LDM/STM ops. 942 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 943 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 944 ++NumMerges; 945 NumMerges += Merges.size(); 946 947 // Try folding preceeding/trailing base inc/dec into those load/store 948 // that were not merged to form LDM/STM ops. 949 for (unsigned i = 0; i != NumMemOps; ++i) 950 if (!MemOps[i].Merged) 951 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 952 ++NumMerges; 953 954 // RS may be pointing to an instruction that's deleted. 955 RS->skipTo(prior(MBBI)); 956 } else if (NumMemOps == 1) { 957 // Try folding preceeding/trailing base inc/dec into the single 958 // load/store. 959 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 960 ++NumMerges; 961 RS->forward(prior(MBBI)); 962 } 963 } 964 965 CurrBase = 0; 966 CurrOpc = -1; 967 CurrSize = 0; 968 CurrPred = ARMCC::AL; 969 CurrPredReg = 0; 970 if (NumMemOps) { 971 MemOps.clear(); 972 NumMemOps = 0; 973 } 974 975 // If iterator hasn't been advanced and this is not a memory op, skip it. 976 // It can't start a new chain anyway. 977 if (!Advance && !isMemOp && MBBI != E) { 978 ++Position; 979 ++MBBI; 980 } 981 } 982 } 983 return NumMerges > 0; 984} 985 986namespace { 987 struct OffsetCompare { 988 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 989 int LOffset = getMemoryOpOffset(LHS); 990 int ROffset = getMemoryOpOffset(RHS); 991 assert(LHS == RHS || LOffset != ROffset); 992 return LOffset > ROffset; 993 } 994 }; 995} 996 997/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 998/// (bx lr) into the preceeding stack restore so it directly restore the value 999/// of LR into pc. 1000/// ldmfd sp!, {r7, lr} 1001/// bx lr 1002/// => 1003/// ldmfd sp!, {r7, pc} 1004bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1005 if (MBB.empty()) return false; 1006 1007 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1008 if (MBBI != MBB.begin() && 1009 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::t2BX_RET)){ 1010 MachineInstr *PrevMI = prior(MBBI); 1011 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) { 1012 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1013 if (MO.getReg() == ARM::LR) { 1014 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1015 PrevMI->setDesc(TII->get(NewOpc)); 1016 MO.setReg(ARM::PC); 1017 MBB.erase(MBBI); 1018 return true; 1019 } 1020 } 1021 } 1022 return false; 1023} 1024 1025bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1026 const TargetMachine &TM = Fn.getTarget(); 1027 AFI = Fn.getInfo<ARMFunctionInfo>(); 1028 TII = TM.getInstrInfo(); 1029 TRI = TM.getRegisterInfo(); 1030 RS = new RegScavenger(); 1031 isThumb2 = AFI->isThumb2Function(); 1032 1033 bool Modified = false; 1034 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1035 ++MFI) { 1036 MachineBasicBlock &MBB = *MFI; 1037 Modified |= LoadStoreMultipleOpti(MBB); 1038 Modified |= MergeReturnIntoLDM(MBB); 1039 } 1040 1041 delete RS; 1042 return Modified; 1043} 1044 1045 1046/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1047/// load / stores from consecutive locations close to make it more 1048/// likely they will be combined later. 1049 1050namespace { 1051 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1052 static char ID; 1053 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1054 1055 const TargetData *TD; 1056 const TargetInstrInfo *TII; 1057 const TargetRegisterInfo *TRI; 1058 const ARMSubtarget *STI; 1059 MachineRegisterInfo *MRI; 1060 1061 virtual bool runOnMachineFunction(MachineFunction &Fn); 1062 1063 virtual const char *getPassName() const { 1064 return "ARM pre- register allocation load / store optimization pass"; 1065 } 1066 1067 private: 1068 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1069 unsigned &NewOpc, unsigned &EvenReg, 1070 unsigned &OddReg, unsigned &BaseReg, 1071 unsigned &OffReg, unsigned &Offset, 1072 unsigned &PredReg, ARMCC::CondCodes &Pred); 1073 bool RescheduleOps(MachineBasicBlock *MBB, 1074 SmallVector<MachineInstr*, 4> &Ops, 1075 unsigned Base, bool isLd, 1076 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1077 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1078 }; 1079 char ARMPreAllocLoadStoreOpt::ID = 0; 1080} 1081 1082bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1083 TD = Fn.getTarget().getTargetData(); 1084 TII = Fn.getTarget().getInstrInfo(); 1085 TRI = Fn.getTarget().getRegisterInfo(); 1086 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1087 MRI = &Fn.getRegInfo(); 1088 1089 bool Modified = false; 1090 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1091 ++MFI) 1092 Modified |= RescheduleLoadStoreInstrs(MFI); 1093 1094 return Modified; 1095} 1096 1097static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1098 MachineBasicBlock::iterator I, 1099 MachineBasicBlock::iterator E, 1100 SmallPtrSet<MachineInstr*, 4> &MemOps, 1101 SmallSet<unsigned, 4> &MemRegs, 1102 const TargetRegisterInfo *TRI) { 1103 // Are there stores / loads / calls between them? 1104 // FIXME: This is overly conservative. We should make use of alias information 1105 // some day. 1106 SmallSet<unsigned, 4> AddedRegPressure; 1107 while (++I != E) { 1108 if (MemOps.count(&*I)) 1109 continue; 1110 const TargetInstrDesc &TID = I->getDesc(); 1111 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1112 return false; 1113 if (isLd && TID.mayStore()) 1114 return false; 1115 if (!isLd) { 1116 if (TID.mayLoad()) 1117 return false; 1118 // It's not safe to move the first 'str' down. 1119 // str r1, [r0] 1120 // strh r5, [r0] 1121 // str r4, [r0, #+4] 1122 if (TID.mayStore()) 1123 return false; 1124 } 1125 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1126 MachineOperand &MO = I->getOperand(j); 1127 if (!MO.isReg()) 1128 continue; 1129 unsigned Reg = MO.getReg(); 1130 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1131 return false; 1132 if (Reg != Base && !MemRegs.count(Reg)) 1133 AddedRegPressure.insert(Reg); 1134 } 1135 } 1136 1137 // Estimate register pressure increase due to the transformation. 1138 if (MemRegs.size() <= 4) 1139 // Ok if we are moving small number of instructions. 1140 return true; 1141 return AddedRegPressure.size() <= MemRegs.size() * 2; 1142} 1143 1144bool 1145ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1146 DebugLoc &dl, 1147 unsigned &NewOpc, unsigned &EvenReg, 1148 unsigned &OddReg, unsigned &BaseReg, 1149 unsigned &OffReg, unsigned &Offset, 1150 unsigned &PredReg, 1151 ARMCC::CondCodes &Pred) { 1152 // FIXME: FLDS / FSTS -> FLDD / FSTD 1153 unsigned Opcode = Op0->getOpcode(); 1154 if (Opcode == ARM::LDR) 1155 NewOpc = ARM::LDRD; 1156 else if (Opcode == ARM::STR) 1157 NewOpc = ARM::STRD; 1158 else 1159 return 0; 1160 1161 // Must sure the base address satisfies i64 ld / st alignment requirement. 1162 if (!Op0->hasOneMemOperand() || 1163 !Op0->memoperands_begin()->getValue() || 1164 Op0->memoperands_begin()->isVolatile()) 1165 return false; 1166 1167 unsigned Align = Op0->memoperands_begin()->getAlignment(); 1168 unsigned ReqAlign = STI->hasV6Ops() 1169 ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align 1170 if (Align < ReqAlign) 1171 return false; 1172 1173 // Then make sure the immediate offset fits. 1174 int OffImm = getMemoryOpOffset(Op0); 1175 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1176 if (OffImm < 0) { 1177 AddSub = ARM_AM::sub; 1178 OffImm = - OffImm; 1179 } 1180 if (OffImm >= 256) // 8 bits 1181 return false; 1182 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1183 1184 EvenReg = Op0->getOperand(0).getReg(); 1185 OddReg = Op1->getOperand(0).getReg(); 1186 if (EvenReg == OddReg) 1187 return false; 1188 BaseReg = Op0->getOperand(1).getReg(); 1189 OffReg = Op0->getOperand(2).getReg(); 1190 Pred = getInstrPredicate(Op0, PredReg); 1191 dl = Op0->getDebugLoc(); 1192 return true; 1193} 1194 1195bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1196 SmallVector<MachineInstr*, 4> &Ops, 1197 unsigned Base, bool isLd, 1198 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1199 bool RetVal = false; 1200 1201 // Sort by offset (in reverse order). 1202 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1203 1204 // The loads / stores of the same base are in order. Scan them from first to 1205 // last and check for the followins: 1206 // 1. Any def of base. 1207 // 2. Any gaps. 1208 while (Ops.size() > 1) { 1209 unsigned FirstLoc = ~0U; 1210 unsigned LastLoc = 0; 1211 MachineInstr *FirstOp = 0; 1212 MachineInstr *LastOp = 0; 1213 int LastOffset = 0; 1214 unsigned LastOpcode = 0; 1215 unsigned LastBytes = 0; 1216 unsigned NumMove = 0; 1217 for (int i = Ops.size() - 1; i >= 0; --i) { 1218 MachineInstr *Op = Ops[i]; 1219 unsigned Loc = MI2LocMap[Op]; 1220 if (Loc <= FirstLoc) { 1221 FirstLoc = Loc; 1222 FirstOp = Op; 1223 } 1224 if (Loc >= LastLoc) { 1225 LastLoc = Loc; 1226 LastOp = Op; 1227 } 1228 1229 unsigned Opcode = Op->getOpcode(); 1230 if (LastOpcode && Opcode != LastOpcode) 1231 break; 1232 1233 int Offset = getMemoryOpOffset(Op); 1234 unsigned Bytes = getLSMultipleTransferSize(Op); 1235 if (LastBytes) { 1236 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1237 break; 1238 } 1239 LastOffset = Offset; 1240 LastBytes = Bytes; 1241 LastOpcode = Opcode; 1242 if (++NumMove == 8) // FIXME: Tune 1243 break; 1244 } 1245 1246 if (NumMove <= 1) 1247 Ops.pop_back(); 1248 else { 1249 SmallPtrSet<MachineInstr*, 4> MemOps; 1250 SmallSet<unsigned, 4> MemRegs; 1251 for (int i = NumMove-1; i >= 0; --i) { 1252 MemOps.insert(Ops[i]); 1253 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1254 } 1255 1256 // Be conservative, if the instructions are too far apart, don't 1257 // move them. We want to limit the increase of register pressure. 1258 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1259 if (DoMove) 1260 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1261 MemOps, MemRegs, TRI); 1262 if (!DoMove) { 1263 for (unsigned i = 0; i != NumMove; ++i) 1264 Ops.pop_back(); 1265 } else { 1266 // This is the new location for the loads / stores. 1267 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1268 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1269 ++InsertPos; 1270 1271 // If we are moving a pair of loads / stores, see if it makes sense 1272 // to try to allocate a pair of registers that can form register pairs. 1273 MachineInstr *Op0 = Ops.back(); 1274 MachineInstr *Op1 = Ops[Ops.size()-2]; 1275 unsigned EvenReg = 0, OddReg = 0; 1276 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1277 ARMCC::CondCodes Pred = ARMCC::AL; 1278 unsigned NewOpc = 0; 1279 unsigned Offset = 0; 1280 DebugLoc dl; 1281 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1282 EvenReg, OddReg, BaseReg, OffReg, 1283 Offset, PredReg, Pred)) { 1284 Ops.pop_back(); 1285 Ops.pop_back(); 1286 1287 // Form the pair instruction. 1288 if (isLd) { 1289 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) 1290 .addReg(EvenReg, RegState::Define) 1291 .addReg(OddReg, RegState::Define) 1292 .addReg(BaseReg).addReg(0).addImm(Offset) 1293 .addImm(Pred).addReg(PredReg); 1294 ++NumLDRDFormed; 1295 } else { 1296 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) 1297 .addReg(EvenReg) 1298 .addReg(OddReg) 1299 .addReg(BaseReg).addReg(0).addImm(Offset) 1300 .addImm(Pred).addReg(PredReg); 1301 ++NumSTRDFormed; 1302 } 1303 MBB->erase(Op0); 1304 MBB->erase(Op1); 1305 1306 // Add register allocation hints to form register pairs. 1307 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1308 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1309 } else { 1310 for (unsigned i = 0; i != NumMove; ++i) { 1311 MachineInstr *Op = Ops.back(); 1312 Ops.pop_back(); 1313 MBB->splice(InsertPos, MBB, Op); 1314 } 1315 } 1316 1317 NumLdStMoved += NumMove; 1318 RetVal = true; 1319 } 1320 } 1321 } 1322 1323 return RetVal; 1324} 1325 1326bool 1327ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1328 bool RetVal = false; 1329 1330 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1331 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1332 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1333 SmallVector<unsigned, 4> LdBases; 1334 SmallVector<unsigned, 4> StBases; 1335 1336 unsigned Loc = 0; 1337 MachineBasicBlock::iterator MBBI = MBB->begin(); 1338 MachineBasicBlock::iterator E = MBB->end(); 1339 while (MBBI != E) { 1340 for (; MBBI != E; ++MBBI) { 1341 MachineInstr *MI = MBBI; 1342 const TargetInstrDesc &TID = MI->getDesc(); 1343 if (TID.isCall() || TID.isTerminator()) { 1344 // Stop at barriers. 1345 ++MBBI; 1346 break; 1347 } 1348 1349 MI2LocMap[MI] = Loc++; 1350 if (!isMemoryOp(MI)) 1351 continue; 1352 unsigned PredReg = 0; 1353 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1354 continue; 1355 1356 int Opcode = MI->getOpcode(); 1357 bool isLd = Opcode == ARM::LDR || 1358 Opcode == ARM::FLDS || Opcode == ARM::FLDD; 1359 unsigned Base = MI->getOperand(1).getReg(); 1360 int Offset = getMemoryOpOffset(MI); 1361 1362 bool StopHere = false; 1363 if (isLd) { 1364 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1365 Base2LdsMap.find(Base); 1366 if (BI != Base2LdsMap.end()) { 1367 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1368 if (Offset == getMemoryOpOffset(BI->second[i])) { 1369 StopHere = true; 1370 break; 1371 } 1372 } 1373 if (!StopHere) 1374 BI->second.push_back(MI); 1375 } else { 1376 SmallVector<MachineInstr*, 4> MIs; 1377 MIs.push_back(MI); 1378 Base2LdsMap[Base] = MIs; 1379 LdBases.push_back(Base); 1380 } 1381 } else { 1382 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1383 Base2StsMap.find(Base); 1384 if (BI != Base2StsMap.end()) { 1385 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1386 if (Offset == getMemoryOpOffset(BI->second[i])) { 1387 StopHere = true; 1388 break; 1389 } 1390 } 1391 if (!StopHere) 1392 BI->second.push_back(MI); 1393 } else { 1394 SmallVector<MachineInstr*, 4> MIs; 1395 MIs.push_back(MI); 1396 Base2StsMap[Base] = MIs; 1397 StBases.push_back(Base); 1398 } 1399 } 1400 1401 if (StopHere) { 1402 // Found a duplicate (a base+offset combination that's seen earlier). 1403 // Backtrack. 1404 --Loc; 1405 break; 1406 } 1407 } 1408 1409 // Re-schedule loads. 1410 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1411 unsigned Base = LdBases[i]; 1412 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1413 if (Lds.size() > 1) 1414 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1415 } 1416 1417 // Re-schedule stores. 1418 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1419 unsigned Base = StBases[i]; 1420 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1421 if (Sts.size() > 1) 1422 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1423 } 1424 1425 if (MBBI != E) { 1426 Base2LdsMap.clear(); 1427 Base2StsMap.clear(); 1428 LdBases.clear(); 1429 StBases.clear(); 1430 } 1431 } 1432 1433 return RetVal; 1434} 1435 1436 1437/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1438/// optimization pass. 1439FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1440 if (PreAlloc) 1441 return new ARMPreAllocLoadStoreOpt(); 1442 return new ARMLoadStoreOpt(); 1443} 1444