ARMLoadStoreOptimizer.cpp revision b43a20e3bce6b4b16151ec25ef3541494fce8425
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMBaseInstrInfo.h" 19#include "ARMMachineFunctionInfo.h" 20#include "ARMRegisterInfo.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/Function.h" 23#include "llvm/CodeGen/MachineBasicBlock.h" 24#include "llvm/CodeGen/MachineFunctionPass.h" 25#include "llvm/CodeGen/MachineInstr.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/RegisterScavenging.h" 29#include "llvm/Target/TargetData.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetMachine.h" 32#include "llvm/Target/TargetRegisterInfo.h" 33#include "llvm/Support/Compiler.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/ADT/DenseMap.h" 36#include "llvm/ADT/STLExtras.h" 37#include "llvm/ADT/SmallPtrSet.h" 38#include "llvm/ADT/SmallSet.h" 39#include "llvm/ADT/SmallVector.h" 40#include "llvm/ADT/Statistic.h" 41using namespace llvm; 42 43STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 44STATISTIC(NumSTMGened , "Number of stm instructions generated"); 45STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); 46STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); 47STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 48STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 49STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 50STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 51STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 52STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 53STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 54 55/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 56/// load / store instructions to form ldm / stm instructions. 57 58namespace { 59 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { 60 static char ID; 61 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 62 63 const TargetInstrInfo *TII; 64 const TargetRegisterInfo *TRI; 65 ARMFunctionInfo *AFI; 66 RegScavenger *RS; 67 bool isThumb2; 68 69 virtual bool runOnMachineFunction(MachineFunction &Fn); 70 71 virtual const char *getPassName() const { 72 return "ARM load / store optimization pass"; 73 } 74 75 private: 76 struct MemOpQueueEntry { 77 int Offset; 78 unsigned Position; 79 MachineBasicBlock::iterator MBBI; 80 bool Merged; 81 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 82 : Offset(o), Position(p), MBBI(i), Merged(false) {}; 83 }; 84 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 85 typedef MemOpQueue::iterator MemOpQueueIter; 86 87 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 88 int Offset, unsigned Base, bool BaseKill, int Opcode, 89 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 90 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 91 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 92 int Opcode, unsigned Size, 93 ARMCC::CondCodes Pred, unsigned PredReg, 94 unsigned Scratch, MemOpQueue &MemOps, 95 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 96 97 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 98 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 99 MachineBasicBlock::iterator &MBBI); 100 bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, 102 const TargetInstrInfo *TII, 103 bool &Advance, 104 MachineBasicBlock::iterator &I); 105 bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 106 MachineBasicBlock::iterator MBBI, 107 bool &Advance, 108 MachineBasicBlock::iterator &I); 109 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 110 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 111 }; 112 char ARMLoadStoreOpt::ID = 0; 113} 114 115static int getLoadStoreMultipleOpcode(int Opcode) { 116 switch (Opcode) { 117 case ARM::LDR: 118 NumLDMGened++; 119 return ARM::LDM; 120 case ARM::STR: 121 NumSTMGened++; 122 return ARM::STM; 123 case ARM::t2LDRi8: 124 case ARM::t2LDRi12: 125 NumLDMGened++; 126 return ARM::t2LDM; 127 case ARM::t2STRi8: 128 case ARM::t2STRi12: 129 NumSTMGened++; 130 return ARM::t2STM; 131 case ARM::FLDS: 132 NumFLDMGened++; 133 return ARM::FLDMS; 134 case ARM::FSTS: 135 NumFSTMGened++; 136 return ARM::FSTMS; 137 case ARM::FLDD: 138 NumFLDMGened++; 139 return ARM::FLDMD; 140 case ARM::FSTD: 141 NumFSTMGened++; 142 return ARM::FSTMD; 143 default: llvm_unreachable("Unhandled opcode!"); 144 } 145 return 0; 146} 147 148static bool isT2i32Load(unsigned Opc) { 149 return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; 150} 151 152static bool isi32Load(unsigned Opc) { 153 return Opc == ARM::LDR || isT2i32Load(Opc); 154} 155 156static bool isT2i32Store(unsigned Opc) { 157 return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; 158} 159 160static bool isi32Store(unsigned Opc) { 161 return Opc == ARM::STR || isT2i32Store(Opc); 162} 163 164/// MergeOps - Create and insert a LDM or STM with Base as base register and 165/// registers in Regs as the register operands that would be loaded / stored. 166/// It returns true if the transformation is done. 167bool 168ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 169 MachineBasicBlock::iterator MBBI, 170 int Offset, unsigned Base, bool BaseKill, 171 int Opcode, ARMCC::CondCodes Pred, 172 unsigned PredReg, unsigned Scratch, DebugLoc dl, 173 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 174 // Only a single register to load / store. Don't bother. 175 unsigned NumRegs = Regs.size(); 176 if (NumRegs <= 1) 177 return false; 178 179 ARM_AM::AMSubMode Mode = ARM_AM::ia; 180 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 181 if (isAM4 && Offset == 4) { 182 if (isThumb2) 183 // Thumb2 does not support ldmib / stmib. 184 return false; 185 Mode = ARM_AM::ib; 186 } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { 187 if (isThumb2) 188 // Thumb2 does not support ldmda / stmda. 189 return false; 190 Mode = ARM_AM::da; 191 } else if (isAM4 && Offset == -4 * (int)NumRegs) { 192 Mode = ARM_AM::db; 193 } else if (Offset != 0) { 194 // If starting offset isn't zero, insert a MI to materialize a new base. 195 // But only do so if it is cost effective, i.e. merging more than two 196 // loads / stores. 197 if (NumRegs <= 2) 198 return false; 199 200 unsigned NewBase; 201 if (isi32Load(Opcode)) 202 // If it is a load, then just use one of the destination register to 203 // use as the new base. 204 NewBase = Regs[NumRegs-1].first; 205 else { 206 // Use the scratch register to use as a new base. 207 NewBase = Scratch; 208 if (NewBase == 0) 209 return false; 210 } 211 int BaseOpc = !isThumb2 212 ? ARM::ADDri 213 : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); 214 if (Offset < 0) { 215 BaseOpc = !isThumb2 216 ? ARM::SUBri 217 : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); 218 Offset = - Offset; 219 } 220 int ImmedOffset = isThumb2 221 ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); 222 if (ImmedOffset == -1) 223 // FIXME: Try t2ADDri12 or t2SUBri12? 224 return false; // Probably not worth it then. 225 226 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 227 .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) 228 .addImm(Pred).addReg(PredReg).addReg(0); 229 Base = NewBase; 230 BaseKill = true; // New base is always killed right its use. 231 } 232 233 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; 234 bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 235 Opcode = getLoadStoreMultipleOpcode(Opcode); 236 MachineInstrBuilder MIB = (isAM4) 237 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 238 .addReg(Base, getKillRegState(BaseKill)) 239 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 240 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 241 .addReg(Base, getKillRegState(BaseKill)) 242 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) 243 .addImm(Pred).addReg(PredReg); 244 MIB.addReg(0); // Add optional writeback (0 for now). 245 for (unsigned i = 0; i != NumRegs; ++i) 246 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 247 | getKillRegState(Regs[i].second)); 248 249 return true; 250} 251 252/// MergeLDR_STR - Merge a number of load / store instructions into one or more 253/// load / store multiple instructions. 254void 255ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 256 unsigned Base, int Opcode, unsigned Size, 257 ARMCC::CondCodes Pred, unsigned PredReg, 258 unsigned Scratch, MemOpQueue &MemOps, 259 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 260 bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); 261 int Offset = MemOps[SIndex].Offset; 262 int SOffset = Offset; 263 unsigned Pos = MemOps[SIndex].Position; 264 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 265 DebugLoc dl = Loc->getDebugLoc(); 266 unsigned PReg = Loc->getOperand(0).getReg(); 267 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); 268 bool isKill = Loc->getOperand(0).isKill(); 269 270 SmallVector<std::pair<unsigned,bool>, 8> Regs; 271 Regs.push_back(std::make_pair(PReg, isKill)); 272 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 273 int NewOffset = MemOps[i].Offset; 274 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); 275 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); 276 isKill = MemOps[i].MBBI->getOperand(0).isKill(); 277 // AM4 - register numbers in ascending order. 278 // AM5 - consecutive register numbers in ascending order. 279 if (NewOffset == Offset + (int)Size && 280 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 281 Offset += Size; 282 Regs.push_back(std::make_pair(Reg, isKill)); 283 PRegNum = RegNum; 284 } else { 285 // Can't merge this in. Try merge the earlier ones first. 286 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, 287 Scratch, dl, Regs)) { 288 Merges.push_back(prior(Loc)); 289 for (unsigned j = SIndex; j < i; ++j) { 290 MBB.erase(MemOps[j].MBBI); 291 MemOps[j].Merged = true; 292 } 293 } 294 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 295 MemOps, Merges); 296 return; 297 } 298 299 if (MemOps[i].Position > Pos) { 300 Pos = MemOps[i].Position; 301 Loc = MemOps[i].MBBI; 302 } 303 } 304 305 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 306 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, 307 Scratch, dl, Regs)) { 308 Merges.push_back(prior(Loc)); 309 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { 310 MBB.erase(MemOps[i].MBBI); 311 MemOps[i].Merged = true; 312 } 313 } 314 315 return; 316} 317 318static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 319 unsigned Bytes, unsigned Limit, 320 ARMCC::CondCodes Pred, unsigned PredReg){ 321 unsigned MyPredReg = 0; 322 if (!MI) 323 return false; 324 if (MI->getOpcode() != ARM::t2SUBri && 325 MI->getOpcode() != ARM::t2SUBrSPi && 326 MI->getOpcode() != ARM::t2SUBrSPi12 && 327 MI->getOpcode() != ARM::tSUBspi && 328 MI->getOpcode() != ARM::SUBri) 329 return false; 330 331 // Make sure the offset fits in 8 bits. 332 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 333 return false; 334 335 unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME 336 return (MI->getOperand(0).getReg() == Base && 337 MI->getOperand(1).getReg() == Base && 338 (MI->getOperand(2).getImm()*Scale) == Bytes && 339 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 340 MyPredReg == PredReg); 341} 342 343static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 344 unsigned Bytes, unsigned Limit, 345 ARMCC::CondCodes Pred, unsigned PredReg){ 346 unsigned MyPredReg = 0; 347 if (!MI) 348 return false; 349 if (MI->getOpcode() != ARM::t2ADDri && 350 MI->getOpcode() != ARM::t2ADDrSPi && 351 MI->getOpcode() != ARM::t2ADDrSPi12 && 352 MI->getOpcode() != ARM::tADDspi && 353 MI->getOpcode() != ARM::ADDri) 354 return false; 355 356 if (Bytes <= 0 || (Limit && Bytes >= Limit)) 357 // Make sure the offset fits in 8 bits. 358 return false; 359 360 unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME 361 return (MI->getOperand(0).getReg() == Base && 362 MI->getOperand(1).getReg() == Base && 363 (MI->getOperand(2).getImm()*Scale) == Bytes && 364 llvm::getInstrPredicate(MI, MyPredReg) == Pred && 365 MyPredReg == PredReg); 366} 367 368static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 369 switch (MI->getOpcode()) { 370 default: return 0; 371 case ARM::LDR: 372 case ARM::STR: 373 case ARM::t2LDRi8: 374 case ARM::t2LDRi12: 375 case ARM::t2STRi8: 376 case ARM::t2STRi12: 377 case ARM::FLDS: 378 case ARM::FSTS: 379 return 4; 380 case ARM::FLDD: 381 case ARM::FSTD: 382 return 8; 383 case ARM::LDM: 384 case ARM::STM: 385 case ARM::t2LDM: 386 case ARM::t2STM: 387 return (MI->getNumOperands() - 5) * 4; 388 case ARM::FLDMS: 389 case ARM::FSTMS: 390 case ARM::FLDMD: 391 case ARM::FSTMD: 392 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 393 } 394} 395 396/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 397/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: 398/// 399/// stmia rn, <ra, rb, rc> 400/// rn := rn + 4 * 3; 401/// => 402/// stmia rn!, <ra, rb, rc> 403/// 404/// rn := rn - 4 * 3; 405/// ldmia rn, <ra, rb, rc> 406/// => 407/// ldmdb rn!, <ra, rb, rc> 408bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 409 MachineBasicBlock::iterator MBBI, 410 bool &Advance, 411 MachineBasicBlock::iterator &I) { 412 MachineInstr *MI = MBBI; 413 unsigned Base = MI->getOperand(0).getReg(); 414 unsigned Bytes = getLSMultipleTransferSize(MI); 415 unsigned PredReg = 0; 416 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 417 int Opcode = MI->getOpcode(); 418 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM || 419 Opcode == ARM::STM || Opcode == ARM::t2STM; 420 421 if (isAM4) { 422 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 423 return false; 424 425 // Can't use the updating AM4 sub-mode if the base register is also a dest 426 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 427 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 428 if (MI->getOperand(i).getReg() == Base) 429 return false; 430 } 431 432 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 433 if (MBBI != MBB.begin()) { 434 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 435 if (Mode == ARM_AM::ia && 436 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 437 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 438 MI->getOperand(4).setReg(Base); 439 MI->getOperand(4).setIsDef(); 440 MBB.erase(PrevMBBI); 441 return true; 442 } else if (Mode == ARM_AM::ib && 443 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 444 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 445 MI->getOperand(4).setReg(Base); // WB to base 446 MI->getOperand(4).setIsDef(); 447 MBB.erase(PrevMBBI); 448 return true; 449 } 450 } 451 452 if (MBBI != MBB.end()) { 453 MachineBasicBlock::iterator NextMBBI = next(MBBI); 454 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 455 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 456 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 457 MI->getOperand(4).setReg(Base); // WB to base 458 MI->getOperand(4).setIsDef(); 459 if (NextMBBI == I) { 460 Advance = true; 461 ++I; 462 } 463 MBB.erase(NextMBBI); 464 return true; 465 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 466 isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 467 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 468 MI->getOperand(4).setReg(Base); // WB to base 469 MI->getOperand(4).setIsDef(); 470 if (NextMBBI == I) { 471 Advance = true; 472 ++I; 473 } 474 MBB.erase(NextMBBI); 475 return true; 476 } 477 } 478 } else { 479 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. 480 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 481 return false; 482 483 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 484 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 485 if (MBBI != MBB.begin()) { 486 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 487 if (Mode == ARM_AM::ia && 488 isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { 489 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 490 MI->getOperand(4).setReg(Base); // WB to base 491 MI->getOperand(4).setIsDef(); 492 MBB.erase(PrevMBBI); 493 return true; 494 } 495 } 496 497 if (MBBI != MBB.end()) { 498 MachineBasicBlock::iterator NextMBBI = next(MBBI); 499 if (Mode == ARM_AM::ia && 500 isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { 501 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 502 MI->getOperand(4).setReg(Base); // WB to base 503 MI->getOperand(4).setIsDef(); 504 if (NextMBBI == I) { 505 Advance = true; 506 ++I; 507 } 508 MBB.erase(NextMBBI); 509 } 510 return true; 511 } 512 } 513 514 return false; 515} 516 517static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 518 switch (Opc) { 519 case ARM::LDR: return ARM::LDR_PRE; 520 case ARM::STR: return ARM::STR_PRE; 521 case ARM::FLDS: return ARM::FLDMS; 522 case ARM::FLDD: return ARM::FLDMD; 523 case ARM::FSTS: return ARM::FSTMS; 524 case ARM::FSTD: return ARM::FSTMD; 525 case ARM::t2LDRi8: 526 case ARM::t2LDRi12: 527 return ARM::t2LDR_PRE; 528 case ARM::t2STRi8: 529 case ARM::t2STRi12: 530 return ARM::t2STR_PRE; 531 default: llvm_unreachable("Unhandled opcode!"); 532 } 533 return 0; 534} 535 536static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 537 switch (Opc) { 538 case ARM::LDR: return ARM::LDR_POST; 539 case ARM::STR: return ARM::STR_POST; 540 case ARM::FLDS: return ARM::FLDMS; 541 case ARM::FLDD: return ARM::FLDMD; 542 case ARM::FSTS: return ARM::FSTMS; 543 case ARM::FSTD: return ARM::FSTMD; 544 case ARM::t2LDRi8: 545 case ARM::t2LDRi12: 546 return ARM::t2LDR_POST; 547 case ARM::t2STRi8: 548 case ARM::t2STRi12: 549 return ARM::t2STR_POST; 550 default: llvm_unreachable("Unhandled opcode!"); 551 } 552 return 0; 553} 554 555/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 556/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 557bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 558 MachineBasicBlock::iterator MBBI, 559 const TargetInstrInfo *TII, 560 bool &Advance, 561 MachineBasicBlock::iterator &I) { 562 MachineInstr *MI = MBBI; 563 unsigned Base = MI->getOperand(1).getReg(); 564 bool BaseKill = MI->getOperand(1).isKill(); 565 unsigned Bytes = getLSMultipleTransferSize(MI); 566 int Opcode = MI->getOpcode(); 567 DebugLoc dl = MI->getDebugLoc(); 568 bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS || 569 Opcode == ARM::FSTD || Opcode == ARM::FSTS; 570 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 571 if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) 572 return false; 573 else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) 574 return false; 575 else if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) 576 if (MI->getOperand(2).getImm() != 0) 577 return false; 578 579 bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 580 // Can't do the merge if the destination register is the same as the would-be 581 // writeback register. 582 if (isLd && MI->getOperand(0).getReg() == Base) 583 return false; 584 585 unsigned PredReg = 0; 586 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 587 bool DoMerge = false; 588 ARM_AM::AddrOpc AddSub = ARM_AM::add; 589 unsigned NewOpc = 0; 590 // AM2 - 12 bits, thumb2 - 8 bits. 591 unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); 592 if (MBBI != MBB.begin()) { 593 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 594 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { 595 DoMerge = true; 596 AddSub = ARM_AM::sub; 597 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 598 } else if (!isAM5 && 599 isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { 600 DoMerge = true; 601 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 602 } 603 if (DoMerge) 604 MBB.erase(PrevMBBI); 605 } 606 607 if (!DoMerge && MBBI != MBB.end()) { 608 MachineBasicBlock::iterator NextMBBI = next(MBBI); 609 if (!isAM5 && 610 isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { 611 DoMerge = true; 612 AddSub = ARM_AM::sub; 613 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 614 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { 615 DoMerge = true; 616 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 617 } 618 if (DoMerge) { 619 if (NextMBBI == I) { 620 Advance = true; 621 ++I; 622 } 623 MBB.erase(NextMBBI); 624 } 625 } 626 627 if (!DoMerge) 628 return false; 629 630 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; 631 unsigned Offset = 0; 632 if (isAM5) 633 Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) 634 ? ARM_AM::db 635 : ARM_AM::ia, true, (isDPR ? 2 : 1)); 636 else if (isAM2) 637 Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); 638 else 639 Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; 640 if (isLd) { 641 if (isAM5) 642 // FLDMS, FLDMD 643 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 644 .addReg(Base, getKillRegState(BaseKill)) 645 .addImm(Offset).addImm(Pred).addReg(PredReg) 646 .addReg(Base, getDefRegState(true)) // WB base register 647 .addReg(MI->getOperand(0).getReg(), RegState::Define); 648 else if (isAM2) 649 // LDR_PRE, LDR_POST, 650 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 651 .addReg(Base, RegState::Define) 652 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 653 else 654 // t2LDR_PRE, t2LDR_POST 655 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 656 .addReg(Base, RegState::Define) 657 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 658 } else { 659 MachineOperand &MO = MI->getOperand(0); 660 if (isAM5) 661 // FSTMS, FSTMD 662 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) 663 .addImm(Pred).addReg(PredReg) 664 .addReg(Base, getDefRegState(true)) // WB base register 665 .addReg(MO.getReg(), getKillRegState(MO.isKill())); 666 else if (isAM2) 667 // STR_PRE, STR_POST 668 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 669 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 670 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 671 else 672 // t2STR_PRE, t2STR_POST 673 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 674 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 675 .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); 676 } 677 MBB.erase(MBBI); 678 679 return true; 680} 681 682/// isMemoryOp - Returns true if instruction is a memory operations (that this 683/// pass is capable of operating on). 684static bool isMemoryOp(const MachineInstr *MI) { 685 int Opcode = MI->getOpcode(); 686 switch (Opcode) { 687 default: break; 688 case ARM::LDR: 689 case ARM::STR: 690 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 691 case ARM::FLDS: 692 case ARM::FSTS: 693 return MI->getOperand(1).isReg(); 694 case ARM::FLDD: 695 case ARM::FSTD: 696 return MI->getOperand(1).isReg(); 697 case ARM::t2LDRi8: 698 case ARM::t2LDRi12: 699 case ARM::t2STRi8: 700 case ARM::t2STRi12: 701 return MI->getOperand(1).isReg(); 702 } 703 return false; 704} 705 706/// AdvanceRS - Advance register scavenger to just before the earliest memory 707/// op that is being merged. 708void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 709 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 710 unsigned Position = MemOps[0].Position; 711 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 712 if (MemOps[i].Position < Position) { 713 Position = MemOps[i].Position; 714 Loc = MemOps[i].MBBI; 715 } 716 } 717 718 if (Loc != MBB.begin()) 719 RS->forward(prior(Loc)); 720} 721 722static int getMemoryOpOffset(const MachineInstr *MI) { 723 int Opcode = MI->getOpcode(); 724 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 725 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 726 unsigned NumOperands = MI->getDesc().getNumOperands(); 727 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 728 729 if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || 730 Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || 731 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) 732 return OffField; 733 734 int Offset = isAM2 735 ? ARM_AM::getAM2Offset(OffField) 736 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 737 : ARM_AM::getAM5Offset(OffField) * 4); 738 if (isAM2) { 739 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 740 Offset = -Offset; 741 } else if (isAM3) { 742 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 743 Offset = -Offset; 744 } else { 745 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 746 Offset = -Offset; 747 } 748 return Offset; 749} 750 751static void InsertLDR_STR(MachineBasicBlock &MBB, 752 MachineBasicBlock::iterator &MBBI, 753 int OffImm, bool isDef, 754 DebugLoc dl, unsigned NewOpc, 755 unsigned Reg, bool RegDeadKill, bool RegUndef, 756 unsigned BaseReg, bool BaseKill, bool BaseUndef, 757 unsigned OffReg, bool OffKill, bool OffUndef, 758 ARMCC::CondCodes Pred, unsigned PredReg, 759 const TargetInstrInfo *TII, bool isT2) { 760 int Offset = OffImm; 761 if (!isT2) { 762 if (OffImm < 0) 763 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 764 else 765 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 766 } 767 if (isDef) { 768 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 769 TII->get(NewOpc)) 770 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 771 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 772 if (!isT2) 773 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 774 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 775 } else { 776 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 777 TII->get(NewOpc)) 778 .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) 779 .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); 780 if (!isT2) 781 MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); 782 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 783 } 784} 785 786bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 787 MachineBasicBlock::iterator &MBBI) { 788 MachineInstr *MI = &*MBBI; 789 unsigned Opcode = MI->getOpcode(); 790 if (Opcode == ARM::LDRD || Opcode == ARM::STRD || 791 Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { 792 unsigned EvenReg = MI->getOperand(0).getReg(); 793 unsigned OddReg = MI->getOperand(1).getReg(); 794 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 795 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 796 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 797 return false; 798 799 bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; 800 bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; 801 bool EvenDeadKill = isLd ? 802 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 803 bool EvenUndef = MI->getOperand(0).isUndef(); 804 bool OddDeadKill = isLd ? 805 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 806 bool OddUndef = MI->getOperand(1).isUndef(); 807 const MachineOperand &BaseOp = MI->getOperand(2); 808 unsigned BaseReg = BaseOp.getReg(); 809 bool BaseKill = BaseOp.isKill(); 810 bool BaseUndef = BaseOp.isUndef(); 811 unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); 812 bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); 813 bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); 814 int OffImm = getMemoryOpOffset(MI); 815 unsigned PredReg = 0; 816 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); 817 818 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 819 // Ascending register numbers and no offset. It's safe to change it to a 820 // ldm or stm. 821 unsigned NewOpc = (isLd) 822 ? (isT2 ? ARM::t2LDM : ARM::LDM) 823 : (isT2 ? ARM::t2STM : ARM::STM); 824 if (isLd) { 825 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 826 .addReg(BaseReg, getKillRegState(BaseKill)) 827 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 828 .addImm(Pred).addReg(PredReg) 829 .addReg(0) 830 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 831 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 832 ++NumLDRD2LDM; 833 } else { 834 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 835 .addReg(BaseReg, getKillRegState(BaseKill)) 836 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 837 .addImm(Pred).addReg(PredReg) 838 .addReg(0) 839 .addReg(EvenReg, 840 getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) 841 .addReg(OddReg, 842 getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); 843 ++NumSTRD2STM; 844 } 845 } else { 846 // Split into two instructions. 847 assert((!isT2 || !OffReg) && 848 "Thumb2 ldrd / strd does not encode offset register!"); 849 unsigned NewOpc = (isLd) 850 ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) 851 : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); 852 DebugLoc dl = MBBI->getDebugLoc(); 853 // If this is a load and base register is killed, it may have been 854 // re-defed by the load, make sure the first load does not clobber it. 855 if (isLd && 856 (BaseKill || OffKill) && 857 (TRI->regsOverlap(EvenReg, BaseReg) || 858 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 859 assert(!TRI->regsOverlap(OddReg, BaseReg) && 860 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 861 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 862 OddReg, OddDeadKill, false, 863 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 864 Pred, PredReg, TII, isT2); 865 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 866 EvenReg, EvenDeadKill, false, 867 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 868 Pred, PredReg, TII, isT2); 869 } else { 870 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 871 EvenReg, EvenDeadKill, EvenUndef, 872 BaseReg, false, BaseUndef, OffReg, false, OffUndef, 873 Pred, PredReg, TII, isT2); 874 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 875 OddReg, OddDeadKill, OddUndef, 876 BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, 877 Pred, PredReg, TII, isT2); 878 } 879 if (isLd) 880 ++NumLDRD2LDR; 881 else 882 ++NumSTRD2STR; 883 } 884 885 MBBI = prior(MBBI); 886 MBB.erase(MI); 887 } 888 return false; 889} 890 891/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 892/// ops of the same base and incrementing offset into LDM / STM ops. 893bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 894 unsigned NumMerges = 0; 895 unsigned NumMemOps = 0; 896 MemOpQueue MemOps; 897 unsigned CurrBase = 0; 898 int CurrOpc = -1; 899 unsigned CurrSize = 0; 900 ARMCC::CondCodes CurrPred = ARMCC::AL; 901 unsigned CurrPredReg = 0; 902 unsigned Position = 0; 903 SmallVector<MachineBasicBlock::iterator,4> Merges; 904 905 RS->enterBasicBlock(&MBB); 906 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 907 while (MBBI != E) { 908 if (FixInvalidRegPairOp(MBB, MBBI)) 909 continue; 910 911 bool Advance = false; 912 bool TryMerge = false; 913 bool Clobber = false; 914 915 bool isMemOp = isMemoryOp(MBBI); 916 if (isMemOp) { 917 int Opcode = MBBI->getOpcode(); 918 unsigned Size = getLSMultipleTransferSize(MBBI); 919 unsigned Base = MBBI->getOperand(1).getReg(); 920 unsigned PredReg = 0; 921 ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); 922 int Offset = getMemoryOpOffset(MBBI); 923 // Watch out for: 924 // r4 := ldr [r5] 925 // r5 := ldr [r5, #4] 926 // r6 := ldr [r5, #8] 927 // 928 // The second ldr has effectively broken the chain even though it 929 // looks like the later ldr(s) use the same base register. Try to 930 // merge the ldr's so far, including this one. But don't try to 931 // combine the following ldr(s). 932 Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); 933 if (CurrBase == 0 && !Clobber) { 934 // Start of a new chain. 935 CurrBase = Base; 936 CurrOpc = Opcode; 937 CurrSize = Size; 938 CurrPred = Pred; 939 CurrPredReg = PredReg; 940 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 941 NumMemOps++; 942 Advance = true; 943 } else { 944 if (Clobber) { 945 TryMerge = true; 946 Advance = true; 947 } 948 949 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 950 // No need to match PredReg. 951 // Continue adding to the queue. 952 if (Offset > MemOps.back().Offset) { 953 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 954 NumMemOps++; 955 Advance = true; 956 } else { 957 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 958 I != E; ++I) { 959 if (Offset < I->Offset) { 960 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 961 NumMemOps++; 962 Advance = true; 963 break; 964 } else if (Offset == I->Offset) { 965 // Collision! This can't be merged! 966 break; 967 } 968 } 969 } 970 } 971 } 972 } 973 974 if (Advance) { 975 ++Position; 976 ++MBBI; 977 } else 978 TryMerge = true; 979 980 if (TryMerge) { 981 if (NumMemOps > 1) { 982 // Try to find a free register to use as a new base in case it's needed. 983 // First advance to the instruction just before the start of the chain. 984 AdvanceRS(MBB, MemOps); 985 // Find a scratch register. 986 unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); 987 // Process the load / store instructions. 988 RS->forward(prior(MBBI)); 989 990 // Merge ops. 991 Merges.clear(); 992 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 993 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 994 995 // Try folding preceeding/trailing base inc/dec into the generated 996 // LDM/STM ops. 997 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 998 if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 999 ++NumMerges; 1000 NumMerges += Merges.size(); 1001 1002 // Try folding preceeding/trailing base inc/dec into those load/store 1003 // that were not merged to form LDM/STM ops. 1004 for (unsigned i = 0; i != NumMemOps; ++i) 1005 if (!MemOps[i].Merged) 1006 if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 1007 ++NumMerges; 1008 1009 // RS may be pointing to an instruction that's deleted. 1010 RS->skipTo(prior(MBBI)); 1011 } else if (NumMemOps == 1) { 1012 // Try folding preceeding/trailing base inc/dec into the single 1013 // load/store. 1014 if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 1015 ++NumMerges; 1016 RS->forward(prior(MBBI)); 1017 } 1018 } 1019 1020 CurrBase = 0; 1021 CurrOpc = -1; 1022 CurrSize = 0; 1023 CurrPred = ARMCC::AL; 1024 CurrPredReg = 0; 1025 if (NumMemOps) { 1026 MemOps.clear(); 1027 NumMemOps = 0; 1028 } 1029 1030 // If iterator hasn't been advanced and this is not a memory op, skip it. 1031 // It can't start a new chain anyway. 1032 if (!Advance && !isMemOp && MBBI != E) { 1033 ++Position; 1034 ++MBBI; 1035 } 1036 } 1037 } 1038 return NumMerges > 0; 1039} 1040 1041namespace { 1042 struct OffsetCompare { 1043 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 1044 int LOffset = getMemoryOpOffset(LHS); 1045 int ROffset = getMemoryOpOffset(RHS); 1046 assert(LHS == RHS || LOffset != ROffset); 1047 return LOffset > ROffset; 1048 } 1049 }; 1050} 1051 1052/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 1053/// (bx lr) into the preceeding stack restore so it directly restore the value 1054/// of LR into pc. 1055/// ldmfd sp!, {r7, lr} 1056/// bx lr 1057/// => 1058/// ldmfd sp!, {r7, pc} 1059bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 1060 if (MBB.empty()) return false; 1061 1062 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 1063 if (MBBI != MBB.begin() && 1064 (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { 1065 MachineInstr *PrevMI = prior(MBBI); 1066 if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) { 1067 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 1068 if (MO.getReg() != ARM::LR) 1069 return false; 1070 unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; 1071 PrevMI->setDesc(TII->get(NewOpc)); 1072 MO.setReg(ARM::PC); 1073 MBB.erase(MBBI); 1074 return true; 1075 } 1076 } 1077 return false; 1078} 1079 1080bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1081 const TargetMachine &TM = Fn.getTarget(); 1082 AFI = Fn.getInfo<ARMFunctionInfo>(); 1083 TII = TM.getInstrInfo(); 1084 TRI = TM.getRegisterInfo(); 1085 RS = new RegScavenger(); 1086 isThumb2 = AFI->isThumb2Function(); 1087 1088 bool Modified = false; 1089 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1090 ++MFI) { 1091 MachineBasicBlock &MBB = *MFI; 1092 Modified |= LoadStoreMultipleOpti(MBB); 1093 Modified |= MergeReturnIntoLDM(MBB); 1094 } 1095 1096 delete RS; 1097 return Modified; 1098} 1099 1100 1101/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 1102/// load / stores from consecutive locations close to make it more 1103/// likely they will be combined later. 1104 1105namespace { 1106 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 1107 static char ID; 1108 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 1109 1110 const TargetData *TD; 1111 const TargetInstrInfo *TII; 1112 const TargetRegisterInfo *TRI; 1113 const ARMSubtarget *STI; 1114 MachineRegisterInfo *MRI; 1115 MachineFunction *MF; 1116 1117 virtual bool runOnMachineFunction(MachineFunction &Fn); 1118 1119 virtual const char *getPassName() const { 1120 return "ARM pre- register allocation load / store optimization pass"; 1121 } 1122 1123 private: 1124 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 1125 unsigned &NewOpc, unsigned &EvenReg, 1126 unsigned &OddReg, unsigned &BaseReg, 1127 unsigned &OffReg, int &Offset, 1128 unsigned &PredReg, ARMCC::CondCodes &Pred, 1129 bool &isT2); 1130 bool RescheduleOps(MachineBasicBlock *MBB, 1131 SmallVector<MachineInstr*, 4> &Ops, 1132 unsigned Base, bool isLd, 1133 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 1134 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 1135 }; 1136 char ARMPreAllocLoadStoreOpt::ID = 0; 1137} 1138 1139bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 1140 TD = Fn.getTarget().getTargetData(); 1141 TII = Fn.getTarget().getInstrInfo(); 1142 TRI = Fn.getTarget().getRegisterInfo(); 1143 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 1144 MRI = &Fn.getRegInfo(); 1145 MF = &Fn; 1146 1147 bool Modified = false; 1148 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 1149 ++MFI) 1150 Modified |= RescheduleLoadStoreInstrs(MFI); 1151 1152 return Modified; 1153} 1154 1155static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, 1156 MachineBasicBlock::iterator I, 1157 MachineBasicBlock::iterator E, 1158 SmallPtrSet<MachineInstr*, 4> &MemOps, 1159 SmallSet<unsigned, 4> &MemRegs, 1160 const TargetRegisterInfo *TRI) { 1161 // Are there stores / loads / calls between them? 1162 // FIXME: This is overly conservative. We should make use of alias information 1163 // some day. 1164 SmallSet<unsigned, 4> AddedRegPressure; 1165 while (++I != E) { 1166 if (MemOps.count(&*I)) 1167 continue; 1168 const TargetInstrDesc &TID = I->getDesc(); 1169 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1170 return false; 1171 if (isLd && TID.mayStore()) 1172 return false; 1173 if (!isLd) { 1174 if (TID.mayLoad()) 1175 return false; 1176 // It's not safe to move the first 'str' down. 1177 // str r1, [r0] 1178 // strh r5, [r0] 1179 // str r4, [r0, #+4] 1180 if (TID.mayStore()) 1181 return false; 1182 } 1183 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1184 MachineOperand &MO = I->getOperand(j); 1185 if (!MO.isReg()) 1186 continue; 1187 unsigned Reg = MO.getReg(); 1188 if (MO.isDef() && TRI->regsOverlap(Reg, Base)) 1189 return false; 1190 if (Reg != Base && !MemRegs.count(Reg)) 1191 AddedRegPressure.insert(Reg); 1192 } 1193 } 1194 1195 // Estimate register pressure increase due to the transformation. 1196 if (MemRegs.size() <= 4) 1197 // Ok if we are moving small number of instructions. 1198 return true; 1199 return AddedRegPressure.size() <= MemRegs.size() * 2; 1200} 1201 1202bool 1203ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1204 DebugLoc &dl, 1205 unsigned &NewOpc, unsigned &EvenReg, 1206 unsigned &OddReg, unsigned &BaseReg, 1207 unsigned &OffReg, int &Offset, 1208 unsigned &PredReg, 1209 ARMCC::CondCodes &Pred, 1210 bool &isT2) { 1211 // Make sure we're allowed to generate LDRD/STRD. 1212 if (!STI->hasV5TEOps()) 1213 return false; 1214 1215 // FIXME: FLDS / FSTS -> FLDD / FSTD 1216 unsigned Scale = 1; 1217 unsigned Opcode = Op0->getOpcode(); 1218 if (Opcode == ARM::LDR) 1219 NewOpc = ARM::LDRD; 1220 else if (Opcode == ARM::STR) 1221 NewOpc = ARM::STRD; 1222 else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { 1223 NewOpc = ARM::t2LDRDi8; 1224 Scale = 4; 1225 isT2 = true; 1226 } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { 1227 NewOpc = ARM::t2STRDi8; 1228 Scale = 4; 1229 isT2 = true; 1230 } else 1231 return false; 1232 1233 // Make sure the offset registers match. 1234 if (!isT2 && 1235 (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) 1236 return false; 1237 1238 // Must sure the base address satisfies i64 ld / st alignment requirement. 1239 if (!Op0->hasOneMemOperand() || 1240 !(*Op0->memoperands_begin())->getValue() || 1241 (*Op0->memoperands_begin())->isVolatile()) 1242 return false; 1243 1244 unsigned Align = (*Op0->memoperands_begin())->getAlignment(); 1245 Function *Func = MF->getFunction(); 1246 unsigned ReqAlign = STI->hasV6Ops() 1247 ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) 1248 : 8; // Pre-v6 need 8-byte align 1249 if (Align < ReqAlign) 1250 return false; 1251 1252 // Then make sure the immediate offset fits. 1253 int OffImm = getMemoryOpOffset(Op0); 1254 if (isT2) { 1255 if (OffImm < 0) { 1256 if (OffImm < -255) 1257 // Can't fall back to t2LDRi8 / t2STRi8. 1258 return false; 1259 } else { 1260 int Limit = (1 << 8) * Scale; 1261 if (OffImm >= Limit || (OffImm & (Scale-1))) 1262 return false; 1263 } 1264 Offset = OffImm; 1265 } else { 1266 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1267 if (OffImm < 0) { 1268 AddSub = ARM_AM::sub; 1269 OffImm = - OffImm; 1270 } 1271 int Limit = (1 << 8) * Scale; 1272 if (OffImm >= Limit || (OffImm & (Scale-1))) 1273 return false; 1274 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1275 } 1276 EvenReg = Op0->getOperand(0).getReg(); 1277 OddReg = Op1->getOperand(0).getReg(); 1278 if (EvenReg == OddReg) 1279 return false; 1280 BaseReg = Op0->getOperand(1).getReg(); 1281 if (!isT2) 1282 OffReg = Op0->getOperand(2).getReg(); 1283 Pred = llvm::getInstrPredicate(Op0, PredReg); 1284 dl = Op0->getDebugLoc(); 1285 return true; 1286} 1287 1288bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1289 SmallVector<MachineInstr*, 4> &Ops, 1290 unsigned Base, bool isLd, 1291 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1292 bool RetVal = false; 1293 1294 // Sort by offset (in reverse order). 1295 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1296 1297 // The loads / stores of the same base are in order. Scan them from first to 1298 // last and check for the followins: 1299 // 1. Any def of base. 1300 // 2. Any gaps. 1301 while (Ops.size() > 1) { 1302 unsigned FirstLoc = ~0U; 1303 unsigned LastLoc = 0; 1304 MachineInstr *FirstOp = 0; 1305 MachineInstr *LastOp = 0; 1306 int LastOffset = 0; 1307 unsigned LastOpcode = 0; 1308 unsigned LastBytes = 0; 1309 unsigned NumMove = 0; 1310 for (int i = Ops.size() - 1; i >= 0; --i) { 1311 MachineInstr *Op = Ops[i]; 1312 unsigned Loc = MI2LocMap[Op]; 1313 if (Loc <= FirstLoc) { 1314 FirstLoc = Loc; 1315 FirstOp = Op; 1316 } 1317 if (Loc >= LastLoc) { 1318 LastLoc = Loc; 1319 LastOp = Op; 1320 } 1321 1322 unsigned Opcode = Op->getOpcode(); 1323 if (LastOpcode && Opcode != LastOpcode) 1324 break; 1325 1326 int Offset = getMemoryOpOffset(Op); 1327 unsigned Bytes = getLSMultipleTransferSize(Op); 1328 if (LastBytes) { 1329 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1330 break; 1331 } 1332 LastOffset = Offset; 1333 LastBytes = Bytes; 1334 LastOpcode = Opcode; 1335 if (++NumMove == 8) // FIXME: Tune this limit. 1336 break; 1337 } 1338 1339 if (NumMove <= 1) 1340 Ops.pop_back(); 1341 else { 1342 SmallPtrSet<MachineInstr*, 4> MemOps; 1343 SmallSet<unsigned, 4> MemRegs; 1344 for (int i = NumMove-1; i >= 0; --i) { 1345 MemOps.insert(Ops[i]); 1346 MemRegs.insert(Ops[i]->getOperand(0).getReg()); 1347 } 1348 1349 // Be conservative, if the instructions are too far apart, don't 1350 // move them. We want to limit the increase of register pressure. 1351 bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this. 1352 if (DoMove) 1353 DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp, 1354 MemOps, MemRegs, TRI); 1355 if (!DoMove) { 1356 for (unsigned i = 0; i != NumMove; ++i) 1357 Ops.pop_back(); 1358 } else { 1359 // This is the new location for the loads / stores. 1360 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1361 while (InsertPos != MBB->end() && MemOps.count(InsertPos)) 1362 ++InsertPos; 1363 1364 // If we are moving a pair of loads / stores, see if it makes sense 1365 // to try to allocate a pair of registers that can form register pairs. 1366 MachineInstr *Op0 = Ops.back(); 1367 MachineInstr *Op1 = Ops[Ops.size()-2]; 1368 unsigned EvenReg = 0, OddReg = 0; 1369 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1370 ARMCC::CondCodes Pred = ARMCC::AL; 1371 bool isT2 = false; 1372 unsigned NewOpc = 0; 1373 int Offset = 0; 1374 DebugLoc dl; 1375 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1376 EvenReg, OddReg, BaseReg, OffReg, 1377 Offset, PredReg, Pred, isT2)) { 1378 Ops.pop_back(); 1379 Ops.pop_back(); 1380 1381 // Form the pair instruction. 1382 if (isLd) { 1383 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1384 dl, TII->get(NewOpc)) 1385 .addReg(EvenReg, RegState::Define) 1386 .addReg(OddReg, RegState::Define) 1387 .addReg(BaseReg); 1388 if (!isT2) 1389 MIB.addReg(OffReg); 1390 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1391 ++NumLDRDFormed; 1392 } else { 1393 MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, 1394 dl, TII->get(NewOpc)) 1395 .addReg(EvenReg) 1396 .addReg(OddReg) 1397 .addReg(BaseReg); 1398 if (!isT2) 1399 MIB.addReg(OffReg); 1400 MIB.addImm(Offset).addImm(Pred).addReg(PredReg); 1401 ++NumSTRDFormed; 1402 } 1403 MBB->erase(Op0); 1404 MBB->erase(Op1); 1405 1406 // Add register allocation hints to form register pairs. 1407 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1408 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1409 } else { 1410 for (unsigned i = 0; i != NumMove; ++i) { 1411 MachineInstr *Op = Ops.back(); 1412 Ops.pop_back(); 1413 MBB->splice(InsertPos, MBB, Op); 1414 } 1415 } 1416 1417 NumLdStMoved += NumMove; 1418 RetVal = true; 1419 } 1420 } 1421 } 1422 1423 return RetVal; 1424} 1425 1426bool 1427ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1428 bool RetVal = false; 1429 1430 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1431 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1432 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1433 SmallVector<unsigned, 4> LdBases; 1434 SmallVector<unsigned, 4> StBases; 1435 1436 unsigned Loc = 0; 1437 MachineBasicBlock::iterator MBBI = MBB->begin(); 1438 MachineBasicBlock::iterator E = MBB->end(); 1439 while (MBBI != E) { 1440 for (; MBBI != E; ++MBBI) { 1441 MachineInstr *MI = MBBI; 1442 const TargetInstrDesc &TID = MI->getDesc(); 1443 if (TID.isCall() || TID.isTerminator()) { 1444 // Stop at barriers. 1445 ++MBBI; 1446 break; 1447 } 1448 1449 MI2LocMap[MI] = Loc++; 1450 if (!isMemoryOp(MI)) 1451 continue; 1452 unsigned PredReg = 0; 1453 if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) 1454 continue; 1455 1456 int Opc = MI->getOpcode(); 1457 bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD; 1458 unsigned Base = MI->getOperand(1).getReg(); 1459 int Offset = getMemoryOpOffset(MI); 1460 1461 bool StopHere = false; 1462 if (isLd) { 1463 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1464 Base2LdsMap.find(Base); 1465 if (BI != Base2LdsMap.end()) { 1466 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1467 if (Offset == getMemoryOpOffset(BI->second[i])) { 1468 StopHere = true; 1469 break; 1470 } 1471 } 1472 if (!StopHere) 1473 BI->second.push_back(MI); 1474 } else { 1475 SmallVector<MachineInstr*, 4> MIs; 1476 MIs.push_back(MI); 1477 Base2LdsMap[Base] = MIs; 1478 LdBases.push_back(Base); 1479 } 1480 } else { 1481 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1482 Base2StsMap.find(Base); 1483 if (BI != Base2StsMap.end()) { 1484 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1485 if (Offset == getMemoryOpOffset(BI->second[i])) { 1486 StopHere = true; 1487 break; 1488 } 1489 } 1490 if (!StopHere) 1491 BI->second.push_back(MI); 1492 } else { 1493 SmallVector<MachineInstr*, 4> MIs; 1494 MIs.push_back(MI); 1495 Base2StsMap[Base] = MIs; 1496 StBases.push_back(Base); 1497 } 1498 } 1499 1500 if (StopHere) { 1501 // Found a duplicate (a base+offset combination that's seen earlier). 1502 // Backtrack. 1503 --Loc; 1504 break; 1505 } 1506 } 1507 1508 // Re-schedule loads. 1509 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1510 unsigned Base = LdBases[i]; 1511 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1512 if (Lds.size() > 1) 1513 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1514 } 1515 1516 // Re-schedule stores. 1517 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1518 unsigned Base = StBases[i]; 1519 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1520 if (Sts.size() > 1) 1521 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1522 } 1523 1524 if (MBBI != E) { 1525 Base2LdsMap.clear(); 1526 Base2StsMap.clear(); 1527 LdBases.clear(); 1528 StBases.clear(); 1529 } 1530 } 1531 1532 return RetVal; 1533} 1534 1535 1536/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1537/// optimization pass. 1538FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1539 if (PreAlloc) 1540 return new ARMPreAllocLoadStoreOpt(); 1541 return new ARMLoadStoreOpt(); 1542} 1543