ARMLoadStoreOptimizer.cpp revision 974fe5d69187bdf33b0e111ff72e965431df4191
1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass that performs load / store related peephole 11// optimizations. This pass should be run after register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-ldst-opt" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "llvm/DerivedTypes.h" 21#include "llvm/CodeGen/MachineBasicBlock.h" 22#include "llvm/CodeGen/MachineFunctionPass.h" 23#include "llvm/CodeGen/MachineInstr.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/RegisterScavenging.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Target/TargetInstrInfo.h" 29#include "llvm/Target/TargetMachine.h" 30#include "llvm/Target/TargetRegisterInfo.h" 31#include "llvm/Support/Compiler.h" 32#include "llvm/ADT/DenseMap.h" 33#include "llvm/ADT/STLExtras.h" 34#include "llvm/ADT/SmallPtrSet.h" 35#include "llvm/ADT/SmallVector.h" 36#include "llvm/ADT/Statistic.h" 37using namespace llvm; 38 39STATISTIC(NumLDMGened , "Number of ldm instructions generated"); 40STATISTIC(NumSTMGened , "Number of stm instructions generated"); 41STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); 42STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); 43STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); 44STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); 45STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); 46STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm"); 47STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm"); 48STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's"); 49STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); 50 51/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine 52/// load / store instructions to form ldm / stm instructions. 53 54namespace { 55 struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { 56 static char ID; 57 ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} 58 59 const TargetInstrInfo *TII; 60 const TargetRegisterInfo *TRI; 61 ARMFunctionInfo *AFI; 62 RegScavenger *RS; 63 64 virtual bool runOnMachineFunction(MachineFunction &Fn); 65 66 virtual const char *getPassName() const { 67 return "ARM load / store optimization pass"; 68 } 69 70 private: 71 struct MemOpQueueEntry { 72 int Offset; 73 unsigned Position; 74 MachineBasicBlock::iterator MBBI; 75 bool Merged; 76 MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) 77 : Offset(o), Position(p), MBBI(i), Merged(false) {}; 78 }; 79 typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; 80 typedef MemOpQueue::iterator MemOpQueueIter; 81 82 bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 83 int Offset, unsigned Base, bool BaseKill, int Opcode, 84 ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, 85 DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); 86 void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, 87 int Opcode, unsigned Size, 88 ARMCC::CondCodes Pred, unsigned PredReg, 89 unsigned Scratch, MemOpQueue &MemOps, 90 SmallVector<MachineBasicBlock::iterator, 4> &Merges); 91 92 void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); 93 bool FixInvalidRegPairOp(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator &MBBI); 95 bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); 96 bool MergeReturnIntoLDM(MachineBasicBlock &MBB); 97 }; 98 char ARMLoadStoreOpt::ID = 0; 99} 100 101static int getLoadStoreMultipleOpcode(int Opcode) { 102 switch (Opcode) { 103 case ARM::LDR: 104 NumLDMGened++; 105 return ARM::LDM; 106 case ARM::STR: 107 NumSTMGened++; 108 return ARM::STM; 109 case ARM::FLDS: 110 NumFLDMGened++; 111 return ARM::FLDMS; 112 case ARM::FSTS: 113 NumFSTMGened++; 114 return ARM::FSTMS; 115 case ARM::FLDD: 116 NumFLDMGened++; 117 return ARM::FLDMD; 118 case ARM::FSTD: 119 NumFSTMGened++; 120 return ARM::FSTMD; 121 default: abort(); 122 } 123 return 0; 124} 125 126/// MergeOps - Create and insert a LDM or STM with Base as base register and 127/// registers in Regs as the register operands that would be loaded / stored. 128/// It returns true if the transformation is done. 129bool 130ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, 131 MachineBasicBlock::iterator MBBI, 132 int Offset, unsigned Base, bool BaseKill, 133 int Opcode, ARMCC::CondCodes Pred, 134 unsigned PredReg, unsigned Scratch, DebugLoc dl, 135 SmallVector<std::pair<unsigned, bool>, 8> &Regs) { 136 // Only a single register to load / store. Don't bother. 137 unsigned NumRegs = Regs.size(); 138 if (NumRegs <= 1) 139 return false; 140 141 ARM_AM::AMSubMode Mode = ARM_AM::ia; 142 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; 143 if (isAM4 && Offset == 4) 144 Mode = ARM_AM::ib; 145 else if (isAM4 && Offset == -4 * (int)NumRegs + 4) 146 Mode = ARM_AM::da; 147 else if (isAM4 && Offset == -4 * (int)NumRegs) 148 Mode = ARM_AM::db; 149 else if (Offset != 0) { 150 // If starting offset isn't zero, insert a MI to materialize a new base. 151 // But only do so if it is cost effective, i.e. merging more than two 152 // loads / stores. 153 if (NumRegs <= 2) 154 return false; 155 156 unsigned NewBase; 157 if (Opcode == ARM::LDR) 158 // If it is a load, then just use one of the destination register to 159 // use as the new base. 160 NewBase = Regs[NumRegs-1].first; 161 else { 162 // Use the scratch register to use as a new base. 163 NewBase = Scratch; 164 if (NewBase == 0) 165 return false; 166 } 167 int BaseOpc = ARM::ADDri; 168 if (Offset < 0) { 169 BaseOpc = ARM::SUBri; 170 Offset = - Offset; 171 } 172 int ImmedOffset = ARM_AM::getSOImmVal(Offset); 173 if (ImmedOffset == -1) 174 return false; // Probably not worth it then. 175 176 BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) 177 .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset) 178 .addImm(Pred).addReg(PredReg).addReg(0); 179 Base = NewBase; 180 BaseKill = true; // New base is always killed right its use. 181 } 182 183 bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; 184 bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 185 Opcode = getLoadStoreMultipleOpcode(Opcode); 186 MachineInstrBuilder MIB = (isAM4) 187 ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 188 .addReg(Base, getKillRegState(BaseKill)) 189 .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) 190 : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) 191 .addReg(Base, getKillRegState(BaseKill)) 192 .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) 193 .addImm(Pred).addReg(PredReg); 194 for (unsigned i = 0; i != NumRegs; ++i) 195 MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) 196 | getKillRegState(Regs[i].second)); 197 198 return true; 199} 200 201/// MergeLDR_STR - Merge a number of load / store instructions into one or more 202/// load / store multiple instructions. 203void 204ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, 205 unsigned Base, int Opcode, unsigned Size, 206 ARMCC::CondCodes Pred, unsigned PredReg, 207 unsigned Scratch, MemOpQueue &MemOps, 208 SmallVector<MachineBasicBlock::iterator, 4> &Merges) { 209 bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; 210 int Offset = MemOps[SIndex].Offset; 211 int SOffset = Offset; 212 unsigned Pos = MemOps[SIndex].Position; 213 MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; 214 DebugLoc dl = Loc->getDebugLoc(); 215 unsigned PReg = Loc->getOperand(0).getReg(); 216 unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); 217 bool isKill = Loc->getOperand(0).isKill(); 218 219 SmallVector<std::pair<unsigned,bool>, 8> Regs; 220 Regs.push_back(std::make_pair(PReg, isKill)); 221 for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { 222 int NewOffset = MemOps[i].Offset; 223 unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg(); 224 unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); 225 isKill = MemOps[i].MBBI->getOperand(0).isKill(); 226 // AM4 - register numbers in ascending order. 227 // AM5 - consecutive register numbers in ascending order. 228 if (NewOffset == Offset + (int)Size && 229 ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) { 230 Offset += Size; 231 Regs.push_back(std::make_pair(Reg, isKill)); 232 PRegNum = RegNum; 233 } else { 234 // Can't merge this in. Try merge the earlier ones first. 235 if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, 236 Scratch, dl, Regs)) { 237 Merges.push_back(prior(Loc)); 238 for (unsigned j = SIndex; j < i; ++j) { 239 MBB.erase(MemOps[j].MBBI); 240 MemOps[j].Merged = true; 241 } 242 } 243 MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, 244 MemOps, Merges); 245 return; 246 } 247 248 if (MemOps[i].Position > Pos) { 249 Pos = MemOps[i].Position; 250 Loc = MemOps[i].MBBI; 251 } 252 } 253 254 bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; 255 if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, 256 Scratch, dl, Regs)) { 257 Merges.push_back(prior(Loc)); 258 for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { 259 MBB.erase(MemOps[i].MBBI); 260 MemOps[i].Merged = true; 261 } 262 } 263 264 return; 265} 266 267/// getInstrPredicate - If instruction is predicated, returns its predicate 268/// condition, otherwise returns AL. It also returns the condition code 269/// register by reference. 270static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { 271 int PIdx = MI->findFirstPredOperandIdx(); 272 if (PIdx == -1) { 273 PredReg = 0; 274 return ARMCC::AL; 275 } 276 277 PredReg = MI->getOperand(PIdx+1).getReg(); 278 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 279} 280 281static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, 282 unsigned Bytes, ARMCC::CondCodes Pred, 283 unsigned PredReg) { 284 unsigned MyPredReg = 0; 285 return (MI && MI->getOpcode() == ARM::SUBri && 286 MI->getOperand(0).getReg() == Base && 287 MI->getOperand(1).getReg() == Base && 288 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && 289 getInstrPredicate(MI, MyPredReg) == Pred && 290 MyPredReg == PredReg); 291} 292 293static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, 294 unsigned Bytes, ARMCC::CondCodes Pred, 295 unsigned PredReg) { 296 unsigned MyPredReg = 0; 297 return (MI && MI->getOpcode() == ARM::ADDri && 298 MI->getOperand(0).getReg() == Base && 299 MI->getOperand(1).getReg() == Base && 300 ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && 301 getInstrPredicate(MI, MyPredReg) == Pred && 302 MyPredReg == PredReg); 303} 304 305static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { 306 switch (MI->getOpcode()) { 307 default: return 0; 308 case ARM::LDR: 309 case ARM::STR: 310 case ARM::FLDS: 311 case ARM::FSTS: 312 return 4; 313 case ARM::FLDD: 314 case ARM::FSTD: 315 return 8; 316 case ARM::LDM: 317 case ARM::STM: 318 return (MI->getNumOperands() - 4) * 4; 319 case ARM::FLDMS: 320 case ARM::FSTMS: 321 case ARM::FLDMD: 322 case ARM::FSTMD: 323 return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; 324 } 325} 326 327/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base 328/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: 329/// 330/// stmia rn, <ra, rb, rc> 331/// rn := rn + 4 * 3; 332/// => 333/// stmia rn!, <ra, rb, rc> 334/// 335/// rn := rn - 4 * 3; 336/// ldmia rn, <ra, rb, rc> 337/// => 338/// ldmdb rn!, <ra, rb, rc> 339static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, 340 MachineBasicBlock::iterator MBBI, 341 bool &Advance, 342 MachineBasicBlock::iterator &I) { 343 MachineInstr *MI = MBBI; 344 unsigned Base = MI->getOperand(0).getReg(); 345 unsigned Bytes = getLSMultipleTransferSize(MI); 346 unsigned PredReg = 0; 347 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 348 int Opcode = MI->getOpcode(); 349 bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; 350 351 if (isAM4) { 352 if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) 353 return false; 354 355 // Can't use the updating AM4 sub-mode if the base register is also a dest 356 // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined. 357 for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) { 358 if (MI->getOperand(i).getReg() == Base) 359 return false; 360 } 361 362 ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); 363 if (MBBI != MBB.begin()) { 364 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 365 if (Mode == ARM_AM::ia && 366 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 367 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); 368 MBB.erase(PrevMBBI); 369 return true; 370 } else if (Mode == ARM_AM::ib && 371 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 372 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); 373 MBB.erase(PrevMBBI); 374 return true; 375 } 376 } 377 378 if (MBBI != MBB.end()) { 379 MachineBasicBlock::iterator NextMBBI = next(MBBI); 380 if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && 381 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 382 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 383 if (NextMBBI == I) { 384 Advance = true; 385 ++I; 386 } 387 MBB.erase(NextMBBI); 388 return true; 389 } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && 390 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 391 MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); 392 if (NextMBBI == I) { 393 Advance = true; 394 ++I; 395 } 396 MBB.erase(NextMBBI); 397 return true; 398 } 399 } 400 } else { 401 // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. 402 if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) 403 return false; 404 405 ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); 406 unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); 407 if (MBBI != MBB.begin()) { 408 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 409 if (Mode == ARM_AM::ia && 410 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 411 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); 412 MBB.erase(PrevMBBI); 413 return true; 414 } 415 } 416 417 if (MBBI != MBB.end()) { 418 MachineBasicBlock::iterator NextMBBI = next(MBBI); 419 if (Mode == ARM_AM::ia && 420 isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 421 MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); 422 if (NextMBBI == I) { 423 Advance = true; 424 ++I; 425 } 426 MBB.erase(NextMBBI); 427 } 428 return true; 429 } 430 } 431 432 return false; 433} 434 435static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { 436 switch (Opc) { 437 case ARM::LDR: return ARM::LDR_PRE; 438 case ARM::STR: return ARM::STR_PRE; 439 case ARM::FLDS: return ARM::FLDMS; 440 case ARM::FLDD: return ARM::FLDMD; 441 case ARM::FSTS: return ARM::FSTMS; 442 case ARM::FSTD: return ARM::FSTMD; 443 default: abort(); 444 } 445 return 0; 446} 447 448static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { 449 switch (Opc) { 450 case ARM::LDR: return ARM::LDR_POST; 451 case ARM::STR: return ARM::STR_POST; 452 case ARM::FLDS: return ARM::FLDMS; 453 case ARM::FLDD: return ARM::FLDMD; 454 case ARM::FSTS: return ARM::FSTMS; 455 case ARM::FSTD: return ARM::FSTMD; 456 default: abort(); 457 } 458 return 0; 459} 460 461/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base 462/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: 463static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, 464 MachineBasicBlock::iterator MBBI, 465 const TargetInstrInfo *TII, 466 bool &Advance, 467 MachineBasicBlock::iterator &I) { 468 MachineInstr *MI = MBBI; 469 unsigned Base = MI->getOperand(1).getReg(); 470 bool BaseKill = MI->getOperand(1).isKill(); 471 unsigned Bytes = getLSMultipleTransferSize(MI); 472 int Opcode = MI->getOpcode(); 473 DebugLoc dl = MI->getDebugLoc(); 474 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 475 if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || 476 (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) 477 return false; 478 479 bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; 480 // Can't do the merge if the destination register is the same as the would-be 481 // writeback register. 482 if (isLd && MI->getOperand(0).getReg() == Base) 483 return false; 484 485 unsigned PredReg = 0; 486 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 487 bool DoMerge = false; 488 ARM_AM::AddrOpc AddSub = ARM_AM::add; 489 unsigned NewOpc = 0; 490 if (MBBI != MBB.begin()) { 491 MachineBasicBlock::iterator PrevMBBI = prior(MBBI); 492 if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { 493 DoMerge = true; 494 AddSub = ARM_AM::sub; 495 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 496 } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes, 497 Pred, PredReg)) { 498 DoMerge = true; 499 NewOpc = getPreIndexedLoadStoreOpcode(Opcode); 500 } 501 if (DoMerge) 502 MBB.erase(PrevMBBI); 503 } 504 505 if (!DoMerge && MBBI != MBB.end()) { 506 MachineBasicBlock::iterator NextMBBI = next(MBBI); 507 if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 508 DoMerge = true; 509 AddSub = ARM_AM::sub; 510 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 511 } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { 512 DoMerge = true; 513 NewOpc = getPostIndexedLoadStoreOpcode(Opcode); 514 } 515 if (DoMerge) { 516 if (NextMBBI == I) { 517 Advance = true; 518 ++I; 519 } 520 MBB.erase(NextMBBI); 521 } 522 } 523 524 if (!DoMerge) 525 return false; 526 527 bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; 528 unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) 529 : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, 530 true, isDPR ? 2 : 1); 531 if (isLd) { 532 if (isAM2) 533 // LDR_PRE, LDR_POST; 534 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) 535 .addReg(Base, RegState::Define) 536 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 537 else 538 // FLDMS, FLDMD 539 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) 540 .addReg(Base, getKillRegState(BaseKill)) 541 .addImm(Offset).addImm(Pred).addReg(PredReg) 542 .addReg(MI->getOperand(0).getReg(), RegState::Define); 543 } else { 544 MachineOperand &MO = MI->getOperand(0); 545 if (isAM2) 546 // STR_PRE, STR_POST; 547 BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) 548 .addReg(MO.getReg(), getKillRegState(MO.isKill())) 549 .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); 550 else 551 // FSTMS, FSTMD 552 BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) 553 .addImm(Pred).addReg(PredReg) 554 .addReg(MO.getReg(), getKillRegState(MO.isKill())); 555 } 556 MBB.erase(MBBI); 557 558 return true; 559} 560 561/// isMemoryOp - Returns true if instruction is a memory operations (that this 562/// pass is capable of operating on). 563static bool isMemoryOp(MachineInstr *MI) { 564 int Opcode = MI->getOpcode(); 565 switch (Opcode) { 566 default: break; 567 case ARM::LDR: 568 case ARM::STR: 569 return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; 570 case ARM::FLDS: 571 case ARM::FSTS: 572 return MI->getOperand(1).isReg(); 573 case ARM::FLDD: 574 case ARM::FSTD: 575 return MI->getOperand(1).isReg(); 576 } 577 return false; 578} 579 580/// AdvanceRS - Advance register scavenger to just before the earliest memory 581/// op that is being merged. 582void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { 583 MachineBasicBlock::iterator Loc = MemOps[0].MBBI; 584 unsigned Position = MemOps[0].Position; 585 for (unsigned i = 1, e = MemOps.size(); i != e; ++i) { 586 if (MemOps[i].Position < Position) { 587 Position = MemOps[i].Position; 588 Loc = MemOps[i].MBBI; 589 } 590 } 591 592 if (Loc != MBB.begin()) 593 RS->forward(prior(Loc)); 594} 595 596static int getMemoryOpOffset(const MachineInstr *MI) { 597 int Opcode = MI->getOpcode(); 598 bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; 599 bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; 600 unsigned NumOperands = MI->getDesc().getNumOperands(); 601 unsigned OffField = MI->getOperand(NumOperands-3).getImm(); 602 int Offset = isAM2 603 ? ARM_AM::getAM2Offset(OffField) 604 : (isAM3 ? ARM_AM::getAM3Offset(OffField) 605 : ARM_AM::getAM5Offset(OffField) * 4); 606 if (isAM2) { 607 if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) 608 Offset = -Offset; 609 } else if (isAM3) { 610 if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) 611 Offset = -Offset; 612 } else { 613 if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) 614 Offset = -Offset; 615 } 616 return Offset; 617} 618 619static void InsertLDR_STR(MachineBasicBlock &MBB, 620 MachineBasicBlock::iterator &MBBI, 621 int OffImm, bool isDef, 622 DebugLoc dl, unsigned NewOpc, 623 unsigned Reg, bool RegDeadKill, 624 unsigned BaseReg, bool BaseKill, 625 unsigned OffReg, bool OffKill, 626 ARMCC::CondCodes Pred, unsigned PredReg, 627 const TargetInstrInfo *TII) { 628 unsigned Offset; 629 if (OffImm < 0) 630 Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); 631 else 632 Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); 633 if (isDef) 634 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 635 .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) 636 .addReg(BaseReg, getKillRegState(BaseKill)) 637 .addReg(OffReg, getKillRegState(OffKill)) 638 .addImm(Offset) 639 .addImm(Pred).addReg(PredReg); 640 else 641 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 642 .addReg(Reg, getKillRegState(RegDeadKill)) 643 .addReg(BaseReg, getKillRegState(BaseKill)) 644 .addReg(OffReg, getKillRegState(OffKill)) 645 .addImm(Offset) 646 .addImm(Pred).addReg(PredReg); 647} 648 649bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, 650 MachineBasicBlock::iterator &MBBI) { 651 MachineInstr *MI = &*MBBI; 652 unsigned Opcode = MI->getOpcode(); 653 if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { 654 unsigned EvenReg = MI->getOperand(0).getReg(); 655 unsigned OddReg = MI->getOperand(1).getReg(); 656 unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); 657 unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); 658 if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) 659 return false; 660 661 bool isLd = Opcode == ARM::LDRD; 662 bool EvenDeadKill = isLd ? 663 MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); 664 bool OddDeadKill = isLd ? 665 MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); 666 const MachineOperand &BaseOp = MI->getOperand(2); 667 unsigned BaseReg = BaseOp.getReg(); 668 bool BaseKill = BaseOp.isKill(); 669 const MachineOperand &OffOp = MI->getOperand(3); 670 unsigned OffReg = OffOp.getReg(); 671 bool OffKill = OffOp.isKill(); 672 int OffImm = getMemoryOpOffset(MI); 673 unsigned PredReg = 0; 674 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 675 676 if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { 677 // Ascending register numbers and no offset. It's safe to change it to a 678 // ldm or stm. 679 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; 680 if (isLd) { 681 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 682 .addReg(BaseReg, getKillRegState(BaseKill)) 683 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 684 .addImm(Pred).addReg(PredReg) 685 .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) 686 .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); 687 ++NumLDRD2LDM; 688 } else { 689 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) 690 .addReg(BaseReg, getKillRegState(BaseKill)) 691 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 692 .addImm(Pred).addReg(PredReg) 693 .addReg(EvenReg, getKillRegState(EvenDeadKill)) 694 .addReg(OddReg, getKillRegState(OddDeadKill)); 695 ++NumSTRD2STM; 696 } 697 } else { 698 // Split into two instructions. 699 unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; 700 DebugLoc dl = MBBI->getDebugLoc(); 701 // If this is a load and base register is killed, it may have been 702 // re-defed by the load, make sure the first load does not clobber it. 703 if (isLd && 704 (BaseKill || OffKill) && 705 (TRI->regsOverlap(EvenReg, BaseReg) || 706 (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { 707 assert(!TRI->regsOverlap(OddReg, BaseReg) && 708 (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); 709 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, 710 BaseReg, false, OffReg, false, Pred, PredReg, TII); 711 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, 712 BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); 713 } else { 714 InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, 715 EvenReg, EvenDeadKill, BaseReg, false, OffReg, false, 716 Pred, PredReg, TII); 717 InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, 718 OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill, 719 Pred, PredReg, TII); 720 } 721 if (isLd) 722 ++NumLDRD2LDR; 723 else 724 ++NumSTRD2STR; 725 } 726 727 MBBI = prior(MBBI); 728 MBB.erase(MI); 729 } 730 return false; 731} 732 733/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR 734/// ops of the same base and incrementing offset into LDM / STM ops. 735bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { 736 unsigned NumMerges = 0; 737 unsigned NumMemOps = 0; 738 MemOpQueue MemOps; 739 unsigned CurrBase = 0; 740 int CurrOpc = -1; 741 unsigned CurrSize = 0; 742 ARMCC::CondCodes CurrPred = ARMCC::AL; 743 unsigned CurrPredReg = 0; 744 unsigned Position = 0; 745 SmallVector<MachineBasicBlock::iterator,4> Merges; 746 747 RS->enterBasicBlock(&MBB); 748 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 749 while (MBBI != E) { 750 if (FixInvalidRegPairOp(MBB, MBBI)) 751 continue; 752 753 bool Advance = false; 754 bool TryMerge = false; 755 bool Clobber = false; 756 757 bool isMemOp = isMemoryOp(MBBI); 758 if (isMemOp) { 759 int Opcode = MBBI->getOpcode(); 760 unsigned Size = getLSMultipleTransferSize(MBBI); 761 unsigned Base = MBBI->getOperand(1).getReg(); 762 unsigned PredReg = 0; 763 ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); 764 int Offset = getMemoryOpOffset(MBBI); 765 // Watch out for: 766 // r4 := ldr [r5] 767 // r5 := ldr [r5, #4] 768 // r6 := ldr [r5, #8] 769 // 770 // The second ldr has effectively broken the chain even though it 771 // looks like the later ldr(s) use the same base register. Try to 772 // merge the ldr's so far, including this one. But don't try to 773 // combine the following ldr(s). 774 Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); 775 if (CurrBase == 0 && !Clobber) { 776 // Start of a new chain. 777 CurrBase = Base; 778 CurrOpc = Opcode; 779 CurrSize = Size; 780 CurrPred = Pred; 781 CurrPredReg = PredReg; 782 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 783 NumMemOps++; 784 Advance = true; 785 } else { 786 if (Clobber) { 787 TryMerge = true; 788 Advance = true; 789 } 790 791 if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) { 792 // No need to match PredReg. 793 // Continue adding to the queue. 794 if (Offset > MemOps.back().Offset) { 795 MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); 796 NumMemOps++; 797 Advance = true; 798 } else { 799 for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); 800 I != E; ++I) { 801 if (Offset < I->Offset) { 802 MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); 803 NumMemOps++; 804 Advance = true; 805 break; 806 } else if (Offset == I->Offset) { 807 // Collision! This can't be merged! 808 break; 809 } 810 } 811 } 812 } 813 } 814 } 815 816 if (Advance) { 817 ++Position; 818 ++MBBI; 819 } else 820 TryMerge = true; 821 822 if (TryMerge) { 823 if (NumMemOps > 1) { 824 // Try to find a free register to use as a new base in case it's needed. 825 // First advance to the instruction just before the start of the chain. 826 AdvanceRS(MBB, MemOps); 827 // Find a scratch register. Make sure it's a call clobbered register or 828 // a spilled callee-saved register. 829 unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); 830 if (!Scratch) 831 Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, 832 AFI->getSpilledCSRegisters()); 833 // Process the load / store instructions. 834 RS->forward(prior(MBBI)); 835 836 // Merge ops. 837 Merges.clear(); 838 MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, 839 CurrPred, CurrPredReg, Scratch, MemOps, Merges); 840 841 // Try folding preceeding/trailing base inc/dec into the generated 842 // LDM/STM ops. 843 for (unsigned i = 0, e = Merges.size(); i < e; ++i) 844 if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) 845 ++NumMerges; 846 NumMerges += Merges.size(); 847 848 // Try folding preceeding/trailing base inc/dec into those load/store 849 // that were not merged to form LDM/STM ops. 850 for (unsigned i = 0; i != NumMemOps; ++i) 851 if (!MemOps[i].Merged) 852 if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) 853 ++NumMerges; 854 855 // RS may be pointing to an instruction that's deleted. 856 RS->skipTo(prior(MBBI)); 857 } else if (NumMemOps == 1) { 858 // Try folding preceeding/trailing base inc/dec into the single 859 // load/store. 860 if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { 861 ++NumMerges; 862 RS->forward(prior(MBBI)); 863 } 864 } 865 866 CurrBase = 0; 867 CurrOpc = -1; 868 CurrSize = 0; 869 CurrPred = ARMCC::AL; 870 CurrPredReg = 0; 871 if (NumMemOps) { 872 MemOps.clear(); 873 NumMemOps = 0; 874 } 875 876 // If iterator hasn't been advanced and this is not a memory op, skip it. 877 // It can't start a new chain anyway. 878 if (!Advance && !isMemOp && MBBI != E) { 879 ++Position; 880 ++MBBI; 881 } 882 } 883 } 884 return NumMerges > 0; 885} 886 887namespace { 888 struct OffsetCompare { 889 bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { 890 int LOffset = getMemoryOpOffset(LHS); 891 int ROffset = getMemoryOpOffset(RHS); 892 assert(LHS == RHS || LOffset != ROffset); 893 return LOffset > ROffset; 894 } 895 }; 896} 897 898/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op 899/// (bx lr) into the preceeding stack restore so it directly restore the value 900/// of LR into pc. 901/// ldmfd sp!, {r7, lr} 902/// bx lr 903/// => 904/// ldmfd sp!, {r7, pc} 905bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { 906 if (MBB.empty()) return false; 907 908 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 909 if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { 910 MachineInstr *PrevMI = prior(MBBI); 911 if (PrevMI->getOpcode() == ARM::LDM) { 912 MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); 913 if (MO.getReg() == ARM::LR) { 914 PrevMI->setDesc(TII->get(ARM::LDM_RET)); 915 MO.setReg(ARM::PC); 916 MBB.erase(MBBI); 917 return true; 918 } 919 } 920 } 921 return false; 922} 923 924bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 925 const TargetMachine &TM = Fn.getTarget(); 926 AFI = Fn.getInfo<ARMFunctionInfo>(); 927 TII = TM.getInstrInfo(); 928 TRI = TM.getRegisterInfo(); 929 RS = new RegScavenger(); 930 931 bool Modified = false; 932 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 933 ++MFI) { 934 MachineBasicBlock &MBB = *MFI; 935 Modified |= LoadStoreMultipleOpti(MBB); 936 Modified |= MergeReturnIntoLDM(MBB); 937 } 938 939 delete RS; 940 return Modified; 941} 942 943 944/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move 945/// load / stores from consecutive locations close to make it more 946/// likely they will be combined later. 947 948namespace { 949 struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ 950 static char ID; 951 ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} 952 953 const TargetData *TD; 954 const TargetInstrInfo *TII; 955 const TargetRegisterInfo *TRI; 956 const ARMSubtarget *STI; 957 MachineRegisterInfo *MRI; 958 959 virtual bool runOnMachineFunction(MachineFunction &Fn); 960 961 virtual const char *getPassName() const { 962 return "ARM pre- register allocation load / store optimization pass"; 963 } 964 965 private: 966 bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, 967 unsigned &NewOpc, unsigned &EvenReg, 968 unsigned &OddReg, unsigned &BaseReg, 969 unsigned &OffReg, unsigned &Offset, 970 unsigned &PredReg, ARMCC::CondCodes &Pred); 971 bool RescheduleOps(MachineBasicBlock *MBB, 972 SmallVector<MachineInstr*, 4> &Ops, 973 unsigned Base, bool isLd, 974 DenseMap<MachineInstr*, unsigned> &MI2LocMap); 975 bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); 976 }; 977 char ARMPreAllocLoadStoreOpt::ID = 0; 978} 979 980bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { 981 TD = Fn.getTarget().getTargetData(); 982 TII = Fn.getTarget().getInstrInfo(); 983 TRI = Fn.getTarget().getRegisterInfo(); 984 STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); 985 MRI = &Fn.getRegInfo(); 986 987 bool Modified = false; 988 for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 989 ++MFI) 990 Modified |= RescheduleLoadStoreInstrs(MFI); 991 992 return Modified; 993} 994 995static bool IsSafeToMove(bool isLd, unsigned Base, 996 MachineBasicBlock::iterator I, 997 MachineBasicBlock::iterator E, 998 SmallPtrSet<MachineInstr*, 4> MoveOps, 999 const TargetRegisterInfo *TRI) { 1000 // Are there stores / loads / calls between them? 1001 // FIXME: This is overly conservative. We should make use of alias information 1002 // some day. 1003 while (++I != E) { 1004 const TargetInstrDesc &TID = I->getDesc(); 1005 if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) 1006 return false; 1007 if (isLd && TID.mayStore()) 1008 return false; 1009 if (!isLd) { 1010 if (TID.mayLoad()) 1011 return false; 1012 // It's not safe to move the first 'str' down. 1013 // str r1, [r0] 1014 // strh r5, [r0] 1015 // str r4, [r0, #+4] 1016 if (TID.mayStore() && !MoveOps.count(&*I)) 1017 return false; 1018 } 1019 for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { 1020 MachineOperand &MO = I->getOperand(j); 1021 if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) 1022 return false; 1023 } 1024 } 1025 return true; 1026} 1027 1028bool 1029ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, 1030 DebugLoc &dl, 1031 unsigned &NewOpc, unsigned &EvenReg, 1032 unsigned &OddReg, unsigned &BaseReg, 1033 unsigned &OffReg, unsigned &Offset, 1034 unsigned &PredReg, 1035 ARMCC::CondCodes &Pred) { 1036 // FIXME: FLDS / FSTS -> FLDD / FSTD 1037 unsigned Opcode = Op0->getOpcode(); 1038 if (Opcode == ARM::LDR) 1039 NewOpc = ARM::LDRD; 1040 else if (Opcode == ARM::STR) 1041 NewOpc = ARM::STRD; 1042 else 1043 return 0; 1044 1045 // Must sure the base address satisfies i64 ld / st alignment requirement. 1046 if (!Op0->hasOneMemOperand() || 1047 !Op0->memoperands_begin()->getValue() || 1048 Op0->memoperands_begin()->isVolatile()) 1049 return false; 1050 1051 unsigned Align = Op0->memoperands_begin()->getAlignment(); 1052 unsigned ReqAlign = STI->hasV6Ops() 1053 ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align 1054 if (Align < ReqAlign) 1055 return false; 1056 1057 // Then make sure the immediate offset fits. 1058 int OffImm = getMemoryOpOffset(Op0); 1059 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1060 if (OffImm < 0) { 1061 AddSub = ARM_AM::sub; 1062 OffImm = - OffImm; 1063 } 1064 if (OffImm >= 256) // 8 bits 1065 return false; 1066 Offset = ARM_AM::getAM3Opc(AddSub, OffImm); 1067 1068 EvenReg = Op0->getOperand(0).getReg(); 1069 OddReg = Op1->getOperand(0).getReg(); 1070 if (EvenReg == OddReg) 1071 return false; 1072 BaseReg = Op0->getOperand(1).getReg(); 1073 OffReg = Op0->getOperand(2).getReg(); 1074 Pred = getInstrPredicate(Op0, PredReg); 1075 dl = Op0->getDebugLoc(); 1076 return true; 1077} 1078 1079bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, 1080 SmallVector<MachineInstr*, 4> &Ops, 1081 unsigned Base, bool isLd, 1082 DenseMap<MachineInstr*, unsigned> &MI2LocMap) { 1083 bool RetVal = false; 1084 1085 // Sort by offset (in reverse order). 1086 std::sort(Ops.begin(), Ops.end(), OffsetCompare()); 1087 1088 // The loads / stores of the same base are in order. Scan them from first to 1089 // last and check for the followins: 1090 // 1. Any def of base. 1091 // 2. Any gaps. 1092 while (Ops.size() > 1) { 1093 unsigned FirstLoc = ~0U; 1094 unsigned LastLoc = 0; 1095 MachineInstr *FirstOp = 0; 1096 MachineInstr *LastOp = 0; 1097 int LastOffset = 0; 1098 unsigned LastOpcode = 0; 1099 unsigned LastBytes = 0; 1100 unsigned NumMove = 0; 1101 for (int i = Ops.size() - 1; i >= 0; --i) { 1102 MachineInstr *Op = Ops[i]; 1103 unsigned Loc = MI2LocMap[Op]; 1104 if (Loc <= FirstLoc) { 1105 FirstLoc = Loc; 1106 FirstOp = Op; 1107 } 1108 if (Loc >= LastLoc) { 1109 LastLoc = Loc; 1110 LastOp = Op; 1111 } 1112 1113 unsigned Opcode = Op->getOpcode(); 1114 if (LastOpcode && Opcode != LastOpcode) 1115 break; 1116 1117 int Offset = getMemoryOpOffset(Op); 1118 unsigned Bytes = getLSMultipleTransferSize(Op); 1119 if (LastBytes) { 1120 if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) 1121 break; 1122 } 1123 LastOffset = Offset; 1124 LastBytes = Bytes; 1125 LastOpcode = Opcode; 1126 if (++NumMove == 4) 1127 break; 1128 } 1129 1130 if (NumMove <= 1) 1131 Ops.pop_back(); 1132 else { 1133 SmallPtrSet<MachineInstr*, 4> MoveOps; 1134 for (int i = NumMove-1; i >= 0; --i) 1135 MoveOps.insert(Ops[i]); 1136 1137 // Be conservative, if the instructions are too far apart, don't 1138 // move them. We want to limit the increase of register pressure. 1139 bool DoMove = (LastLoc - FirstLoc) < NumMove*4; 1140 if (DoMove) 1141 DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); 1142 if (!DoMove) { 1143 for (unsigned i = 0; i != NumMove; ++i) 1144 Ops.pop_back(); 1145 } else { 1146 // This is the new location for the loads / stores. 1147 MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; 1148 while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) 1149 ++InsertPos; 1150 1151 // If we are moving a pair of loads / stores, see if it makes sense 1152 // to try to allocate a pair of registers that can form register pairs. 1153 MachineInstr *Op0 = Ops.back(); 1154 MachineInstr *Op1 = Ops[Ops.size()-2]; 1155 unsigned EvenReg = 0, OddReg = 0; 1156 unsigned BaseReg = 0, OffReg = 0, PredReg = 0; 1157 ARMCC::CondCodes Pred = ARMCC::AL; 1158 unsigned NewOpc = 0; 1159 unsigned Offset = 0; 1160 DebugLoc dl; 1161 if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, 1162 EvenReg, OddReg, BaseReg, OffReg, 1163 Offset, PredReg, Pred)) { 1164 Ops.pop_back(); 1165 Ops.pop_back(); 1166 1167 // Form the pair instruction. 1168 if (isLd) { 1169 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) 1170 .addReg(EvenReg, RegState::Define) 1171 .addReg(OddReg, RegState::Define) 1172 .addReg(BaseReg).addReg(0).addImm(Offset) 1173 .addImm(Pred).addReg(PredReg); 1174 ++NumLDRDFormed; 1175 } else { 1176 BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) 1177 .addReg(EvenReg) 1178 .addReg(OddReg) 1179 .addReg(BaseReg).addReg(0).addImm(Offset) 1180 .addImm(Pred).addReg(PredReg); 1181 ++NumSTRDFormed; 1182 } 1183 MBB->erase(Op0); 1184 MBB->erase(Op1); 1185 1186 // Add register allocation hints to form register pairs. 1187 MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); 1188 MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); 1189 } else { 1190 for (unsigned i = 0; i != NumMove; ++i) { 1191 MachineInstr *Op = Ops.back(); 1192 Ops.pop_back(); 1193 MBB->splice(InsertPos, MBB, Op); 1194 } 1195 } 1196 1197 NumLdStMoved += NumMove; 1198 RetVal = true; 1199 } 1200 } 1201 } 1202 1203 return RetVal; 1204} 1205 1206bool 1207ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { 1208 bool RetVal = false; 1209 1210 DenseMap<MachineInstr*, unsigned> MI2LocMap; 1211 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; 1212 DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; 1213 SmallVector<unsigned, 4> LdBases; 1214 SmallVector<unsigned, 4> StBases; 1215 1216 unsigned Loc = 0; 1217 MachineBasicBlock::iterator MBBI = MBB->begin(); 1218 MachineBasicBlock::iterator E = MBB->end(); 1219 while (MBBI != E) { 1220 for (; MBBI != E; ++MBBI) { 1221 MachineInstr *MI = MBBI; 1222 const TargetInstrDesc &TID = MI->getDesc(); 1223 if (TID.isCall() || TID.isTerminator()) { 1224 // Stop at barriers. 1225 ++MBBI; 1226 break; 1227 } 1228 1229 MI2LocMap[MI] = Loc++; 1230 if (!isMemoryOp(MI)) 1231 continue; 1232 unsigned PredReg = 0; 1233 if (getInstrPredicate(MI, PredReg) != ARMCC::AL) 1234 continue; 1235 1236 int Opcode = MI->getOpcode(); 1237 bool isLd = Opcode == ARM::LDR || 1238 Opcode == ARM::FLDS || Opcode == ARM::FLDD; 1239 unsigned Base = MI->getOperand(1).getReg(); 1240 int Offset = getMemoryOpOffset(MI); 1241 1242 bool StopHere = false; 1243 if (isLd) { 1244 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1245 Base2LdsMap.find(Base); 1246 if (BI != Base2LdsMap.end()) { 1247 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1248 if (Offset == getMemoryOpOffset(BI->second[i])) { 1249 StopHere = true; 1250 break; 1251 } 1252 } 1253 if (!StopHere) 1254 BI->second.push_back(MI); 1255 } else { 1256 SmallVector<MachineInstr*, 4> MIs; 1257 MIs.push_back(MI); 1258 Base2LdsMap[Base] = MIs; 1259 LdBases.push_back(Base); 1260 } 1261 } else { 1262 DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = 1263 Base2StsMap.find(Base); 1264 if (BI != Base2StsMap.end()) { 1265 for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { 1266 if (Offset == getMemoryOpOffset(BI->second[i])) { 1267 StopHere = true; 1268 break; 1269 } 1270 } 1271 if (!StopHere) 1272 BI->second.push_back(MI); 1273 } else { 1274 SmallVector<MachineInstr*, 4> MIs; 1275 MIs.push_back(MI); 1276 Base2StsMap[Base] = MIs; 1277 StBases.push_back(Base); 1278 } 1279 } 1280 1281 if (StopHere) { 1282 // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. 1283 --Loc; 1284 break; 1285 } 1286 } 1287 1288 // Re-schedule loads. 1289 for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { 1290 unsigned Base = LdBases[i]; 1291 SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; 1292 if (Lds.size() > 1) 1293 RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); 1294 } 1295 1296 // Re-schedule stores. 1297 for (unsigned i = 0, e = StBases.size(); i != e; ++i) { 1298 unsigned Base = StBases[i]; 1299 SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; 1300 if (Sts.size() > 1) 1301 RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); 1302 } 1303 1304 if (MBBI != E) { 1305 Base2LdsMap.clear(); 1306 Base2StsMap.clear(); 1307 LdBases.clear(); 1308 StBases.clear(); 1309 } 1310 } 1311 1312 return RetVal; 1313} 1314 1315 1316/// createARMLoadStoreOptimizationPass - returns an instance of the load / store 1317/// optimization pass. 1318FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { 1319 if (PreAlloc) 1320 return new ARMPreAllocLoadStoreOpt(); 1321 return new ARMLoadStoreOpt(); 1322} 1323