ARMBaseInstrInfo.cpp revision 8239daf7c83a65a189c352cce3191cdc3bbfe151
1//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the Base ARM implementation of the TargetInstrInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ARMBaseInstrInfo.h" 15#include "ARM.h" 16#include "ARMAddressingModes.h" 17#include "ARMConstantPoolValue.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "ARMGenInstrInfo.inc" 21#include "llvm/Constants.h" 22#include "llvm/Function.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/CodeGen/LiveVariables.h" 25#include "llvm/CodeGen/MachineConstantPool.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineJumpTableInfo.h" 29#include "llvm/CodeGen/MachineMemOperand.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/CodeGen/PseudoSourceValue.h" 32#include "llvm/MC/MCAsmInfo.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/ErrorHandling.h" 36#include "llvm/ADT/STLExtras.h" 37using namespace llvm; 38 39static cl::opt<bool> 40EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 41 cl::desc("Enable ARM 2-addr to 3-addr conv")); 42 43ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 44 : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), 45 Subtarget(STI) { 46} 47 48MachineInstr * 49ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 50 MachineBasicBlock::iterator &MBBI, 51 LiveVariables *LV) const { 52 // FIXME: Thumb2 support. 53 54 if (!EnableARM3Addr) 55 return NULL; 56 57 MachineInstr *MI = MBBI; 58 MachineFunction &MF = *MI->getParent()->getParent(); 59 uint64_t TSFlags = MI->getDesc().TSFlags; 60 bool isPre = false; 61 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 62 default: return NULL; 63 case ARMII::IndexModePre: 64 isPre = true; 65 break; 66 case ARMII::IndexModePost: 67 break; 68 } 69 70 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 71 // operation. 72 unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 73 if (MemOpc == 0) 74 return NULL; 75 76 MachineInstr *UpdateMI = NULL; 77 MachineInstr *MemMI = NULL; 78 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 79 const TargetInstrDesc &TID = MI->getDesc(); 80 unsigned NumOps = TID.getNumOperands(); 81 bool isLoad = !TID.mayStore(); 82 const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 83 const MachineOperand &Base = MI->getOperand(2); 84 const MachineOperand &Offset = MI->getOperand(NumOps-3); 85 unsigned WBReg = WB.getReg(); 86 unsigned BaseReg = Base.getReg(); 87 unsigned OffReg = Offset.getReg(); 88 unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 89 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 90 switch (AddrMode) { 91 default: 92 assert(false && "Unknown indexed op!"); 93 return NULL; 94 case ARMII::AddrMode2: { 95 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 96 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 97 if (OffReg == 0) { 98 if (ARM_AM::getSOImmVal(Amt) == -1) 99 // Can't encode it in a so_imm operand. This transformation will 100 // add more than 1 instruction. Abandon! 101 return NULL; 102 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 103 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 104 .addReg(BaseReg).addImm(Amt) 105 .addImm(Pred).addReg(0).addReg(0); 106 } else if (Amt != 0) { 107 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 108 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 109 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 110 get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) 111 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 112 .addImm(Pred).addReg(0).addReg(0); 113 } else 114 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 115 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 116 .addReg(BaseReg).addReg(OffReg) 117 .addImm(Pred).addReg(0).addReg(0); 118 break; 119 } 120 case ARMII::AddrMode3 : { 121 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 122 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 123 if (OffReg == 0) 124 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 125 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 126 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 127 .addReg(BaseReg).addImm(Amt) 128 .addImm(Pred).addReg(0).addReg(0); 129 else 130 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 131 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 132 .addReg(BaseReg).addReg(OffReg) 133 .addImm(Pred).addReg(0).addReg(0); 134 break; 135 } 136 } 137 138 std::vector<MachineInstr*> NewMIs; 139 if (isPre) { 140 if (isLoad) 141 MemMI = BuildMI(MF, MI->getDebugLoc(), 142 get(MemOpc), MI->getOperand(0).getReg()) 143 .addReg(WBReg).addImm(0).addImm(Pred); 144 else 145 MemMI = BuildMI(MF, MI->getDebugLoc(), 146 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 147 .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 148 NewMIs.push_back(MemMI); 149 NewMIs.push_back(UpdateMI); 150 } else { 151 if (isLoad) 152 MemMI = BuildMI(MF, MI->getDebugLoc(), 153 get(MemOpc), MI->getOperand(0).getReg()) 154 .addReg(BaseReg).addImm(0).addImm(Pred); 155 else 156 MemMI = BuildMI(MF, MI->getDebugLoc(), 157 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 158 .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 159 if (WB.isDead()) 160 UpdateMI->getOperand(0).setIsDead(); 161 NewMIs.push_back(UpdateMI); 162 NewMIs.push_back(MemMI); 163 } 164 165 // Transfer LiveVariables states, kill / dead info. 166 if (LV) { 167 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 168 MachineOperand &MO = MI->getOperand(i); 169 if (MO.isReg() && MO.getReg() && 170 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 171 unsigned Reg = MO.getReg(); 172 173 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 174 if (MO.isDef()) { 175 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 176 if (MO.isDead()) 177 LV->addVirtualRegisterDead(Reg, NewMI); 178 } 179 if (MO.isUse() && MO.isKill()) { 180 for (unsigned j = 0; j < 2; ++j) { 181 // Look at the two new MI's in reverse order. 182 MachineInstr *NewMI = NewMIs[j]; 183 if (!NewMI->readsRegister(Reg)) 184 continue; 185 LV->addVirtualRegisterKilled(Reg, NewMI); 186 if (VI.removeKill(MI)) 187 VI.Kills.push_back(NewMI); 188 break; 189 } 190 } 191 } 192 } 193 } 194 195 MFI->insert(MBBI, NewMIs[1]); 196 MFI->insert(MBBI, NewMIs[0]); 197 return NewMIs[0]; 198} 199 200bool 201ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 202 MachineBasicBlock::iterator MI, 203 const std::vector<CalleeSavedInfo> &CSI, 204 const TargetRegisterInfo *TRI) const { 205 if (CSI.empty()) 206 return false; 207 208 DebugLoc DL; 209 if (MI != MBB.end()) DL = MI->getDebugLoc(); 210 211 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 212 unsigned Reg = CSI[i].getReg(); 213 bool isKill = true; 214 215 // Add the callee-saved register as live-in unless it's LR and 216 // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress 217 // then it's already added to the function and entry block live-in sets. 218 if (Reg == ARM::LR) { 219 MachineFunction &MF = *MBB.getParent(); 220 if (MF.getFrameInfo()->isReturnAddressTaken() && 221 MF.getRegInfo().isLiveIn(Reg)) 222 isKill = false; 223 } 224 225 if (isKill) 226 MBB.addLiveIn(Reg); 227 228 // Insert the spill to the stack frame. The register is killed at the spill 229 // 230 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 231 storeRegToStackSlot(MBB, MI, Reg, isKill, 232 CSI[i].getFrameIdx(), RC, TRI); 233 } 234 return true; 235} 236 237// Branch analysis. 238bool 239ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 240 MachineBasicBlock *&FBB, 241 SmallVectorImpl<MachineOperand> &Cond, 242 bool AllowModify) const { 243 // If the block has no terminators, it just falls into the block after it. 244 MachineBasicBlock::iterator I = MBB.end(); 245 if (I == MBB.begin()) 246 return false; 247 --I; 248 while (I->isDebugValue()) { 249 if (I == MBB.begin()) 250 return false; 251 --I; 252 } 253 if (!isUnpredicatedTerminator(I)) 254 return false; 255 256 // Get the last instruction in the block. 257 MachineInstr *LastInst = I; 258 259 // If there is only one terminator instruction, process it. 260 unsigned LastOpc = LastInst->getOpcode(); 261 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 262 if (isUncondBranchOpcode(LastOpc)) { 263 TBB = LastInst->getOperand(0).getMBB(); 264 return false; 265 } 266 if (isCondBranchOpcode(LastOpc)) { 267 // Block ends with fall-through condbranch. 268 TBB = LastInst->getOperand(0).getMBB(); 269 Cond.push_back(LastInst->getOperand(1)); 270 Cond.push_back(LastInst->getOperand(2)); 271 return false; 272 } 273 return true; // Can't handle indirect branch. 274 } 275 276 // Get the instruction before it if it is a terminator. 277 MachineInstr *SecondLastInst = I; 278 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 279 280 // If AllowModify is true and the block ends with two or more unconditional 281 // branches, delete all but the first unconditional branch. 282 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 283 while (isUncondBranchOpcode(SecondLastOpc)) { 284 LastInst->eraseFromParent(); 285 LastInst = SecondLastInst; 286 LastOpc = LastInst->getOpcode(); 287 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 288 // Return now the only terminator is an unconditional branch. 289 TBB = LastInst->getOperand(0).getMBB(); 290 return false; 291 } else { 292 SecondLastInst = I; 293 SecondLastOpc = SecondLastInst->getOpcode(); 294 } 295 } 296 } 297 298 // If there are three terminators, we don't know what sort of block this is. 299 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 300 return true; 301 302 // If the block ends with a B and a Bcc, handle it. 303 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 304 TBB = SecondLastInst->getOperand(0).getMBB(); 305 Cond.push_back(SecondLastInst->getOperand(1)); 306 Cond.push_back(SecondLastInst->getOperand(2)); 307 FBB = LastInst->getOperand(0).getMBB(); 308 return false; 309 } 310 311 // If the block ends with two unconditional branches, handle it. The second 312 // one is not executed, so remove it. 313 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 314 TBB = SecondLastInst->getOperand(0).getMBB(); 315 I = LastInst; 316 if (AllowModify) 317 I->eraseFromParent(); 318 return false; 319 } 320 321 // ...likewise if it ends with a branch table followed by an unconditional 322 // branch. The branch folder can create these, and we must get rid of them for 323 // correctness of Thumb constant islands. 324 if ((isJumpTableBranchOpcode(SecondLastOpc) || 325 isIndirectBranchOpcode(SecondLastOpc)) && 326 isUncondBranchOpcode(LastOpc)) { 327 I = LastInst; 328 if (AllowModify) 329 I->eraseFromParent(); 330 return true; 331 } 332 333 // Otherwise, can't handle this. 334 return true; 335} 336 337 338unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 339 MachineBasicBlock::iterator I = MBB.end(); 340 if (I == MBB.begin()) return 0; 341 --I; 342 while (I->isDebugValue()) { 343 if (I == MBB.begin()) 344 return 0; 345 --I; 346 } 347 if (!isUncondBranchOpcode(I->getOpcode()) && 348 !isCondBranchOpcode(I->getOpcode())) 349 return 0; 350 351 // Remove the branch. 352 I->eraseFromParent(); 353 354 I = MBB.end(); 355 356 if (I == MBB.begin()) return 1; 357 --I; 358 if (!isCondBranchOpcode(I->getOpcode())) 359 return 1; 360 361 // Remove the branch. 362 I->eraseFromParent(); 363 return 2; 364} 365 366unsigned 367ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 368 MachineBasicBlock *FBB, 369 const SmallVectorImpl<MachineOperand> &Cond, 370 DebugLoc DL) const { 371 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 372 int BOpc = !AFI->isThumbFunction() 373 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 374 int BccOpc = !AFI->isThumbFunction() 375 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 376 377 // Shouldn't be a fall through. 378 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 379 assert((Cond.size() == 2 || Cond.size() == 0) && 380 "ARM branch conditions have two components!"); 381 382 if (FBB == 0) { 383 if (Cond.empty()) // Unconditional branch? 384 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 385 else 386 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 387 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 388 return 1; 389 } 390 391 // Two-way conditional branch. 392 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 393 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 394 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 395 return 2; 396} 397 398bool ARMBaseInstrInfo:: 399ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 400 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 401 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 402 return false; 403} 404 405bool ARMBaseInstrInfo:: 406PredicateInstruction(MachineInstr *MI, 407 const SmallVectorImpl<MachineOperand> &Pred) const { 408 unsigned Opc = MI->getOpcode(); 409 if (isUncondBranchOpcode(Opc)) { 410 MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 411 MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); 412 MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); 413 return true; 414 } 415 416 int PIdx = MI->findFirstPredOperandIdx(); 417 if (PIdx != -1) { 418 MachineOperand &PMO = MI->getOperand(PIdx); 419 PMO.setImm(Pred[0].getImm()); 420 MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 421 return true; 422 } 423 return false; 424} 425 426bool ARMBaseInstrInfo:: 427SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 428 const SmallVectorImpl<MachineOperand> &Pred2) const { 429 if (Pred1.size() > 2 || Pred2.size() > 2) 430 return false; 431 432 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 433 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 434 if (CC1 == CC2) 435 return true; 436 437 switch (CC1) { 438 default: 439 return false; 440 case ARMCC::AL: 441 return true; 442 case ARMCC::HS: 443 return CC2 == ARMCC::HI; 444 case ARMCC::LS: 445 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 446 case ARMCC::GE: 447 return CC2 == ARMCC::GT; 448 case ARMCC::LE: 449 return CC2 == ARMCC::LT; 450 } 451} 452 453bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 454 std::vector<MachineOperand> &Pred) const { 455 // FIXME: This confuses implicit_def with optional CPSR def. 456 const TargetInstrDesc &TID = MI->getDesc(); 457 if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) 458 return false; 459 460 bool Found = false; 461 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 462 const MachineOperand &MO = MI->getOperand(i); 463 if (MO.isReg() && MO.getReg() == ARM::CPSR) { 464 Pred.push_back(MO); 465 Found = true; 466 } 467 } 468 469 return Found; 470} 471 472/// isPredicable - Return true if the specified instruction can be predicated. 473/// By default, this returns true for every instruction with a 474/// PredicateOperand. 475bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 476 const TargetInstrDesc &TID = MI->getDesc(); 477 if (!TID.isPredicable()) 478 return false; 479 480 if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { 481 ARMFunctionInfo *AFI = 482 MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 483 return AFI->isThumb2Function(); 484 } 485 return true; 486} 487 488/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 489LLVM_ATTRIBUTE_NOINLINE 490static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 491 unsigned JTI); 492static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 493 unsigned JTI) { 494 assert(JTI < JT.size()); 495 return JT[JTI].MBBs.size(); 496} 497 498/// GetInstSize - Return the size of the specified MachineInstr. 499/// 500unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 501 const MachineBasicBlock &MBB = *MI->getParent(); 502 const MachineFunction *MF = MBB.getParent(); 503 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 504 505 // Basic size info comes from the TSFlags field. 506 const TargetInstrDesc &TID = MI->getDesc(); 507 uint64_t TSFlags = TID.TSFlags; 508 509 unsigned Opc = MI->getOpcode(); 510 switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { 511 default: { 512 // If this machine instr is an inline asm, measure it. 513 if (MI->getOpcode() == ARM::INLINEASM) 514 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 515 if (MI->isLabel()) 516 return 0; 517 switch (Opc) { 518 default: 519 llvm_unreachable("Unknown or unset size field for instr!"); 520 case TargetOpcode::IMPLICIT_DEF: 521 case TargetOpcode::KILL: 522 case TargetOpcode::PROLOG_LABEL: 523 case TargetOpcode::EH_LABEL: 524 case TargetOpcode::DBG_VALUE: 525 return 0; 526 } 527 break; 528 } 529 case ARMII::Size8Bytes: return 8; // ARM instruction x 2. 530 case ARMII::Size4Bytes: return 4; // ARM / Thumb2 instruction. 531 case ARMII::Size2Bytes: return 2; // Thumb1 instruction. 532 case ARMII::SizeSpecial: { 533 switch (Opc) { 534 case ARM::MOVi32imm: 535 case ARM::t2MOVi32imm: 536 return 8; 537 case ARM::CONSTPOOL_ENTRY: 538 // If this machine instr is a constant pool entry, its size is recorded as 539 // operand #2. 540 return MI->getOperand(2).getImm(); 541 case ARM::Int_eh_sjlj_longjmp: 542 return 16; 543 case ARM::tInt_eh_sjlj_longjmp: 544 return 10; 545 case ARM::Int_eh_sjlj_setjmp: 546 case ARM::Int_eh_sjlj_setjmp_nofp: 547 return 20; 548 case ARM::tInt_eh_sjlj_setjmp: 549 case ARM::t2Int_eh_sjlj_setjmp: 550 case ARM::t2Int_eh_sjlj_setjmp_nofp: 551 return 12; 552 case ARM::BR_JTr: 553 case ARM::BR_JTm: 554 case ARM::BR_JTadd: 555 case ARM::tBR_JTr: 556 case ARM::t2BR_JT: 557 case ARM::t2TBB: 558 case ARM::t2TBH: { 559 // These are jumptable branches, i.e. a branch followed by an inlined 560 // jumptable. The size is 4 + 4 * number of entries. For TBB, each 561 // entry is one byte; TBH two byte each. 562 unsigned EntrySize = (Opc == ARM::t2TBB) 563 ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4); 564 unsigned NumOps = TID.getNumOperands(); 565 MachineOperand JTOP = 566 MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); 567 unsigned JTI = JTOP.getIndex(); 568 const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 569 assert(MJTI != 0); 570 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 571 assert(JTI < JT.size()); 572 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 573 // 4 aligned. The assembler / linker may add 2 byte padding just before 574 // the JT entries. The size does not include this padding; the 575 // constant islands pass does separate bookkeeping for it. 576 // FIXME: If we know the size of the function is less than (1 << 16) *2 577 // bytes, we can use 16-bit entries instead. Then there won't be an 578 // alignment issue. 579 unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 580 unsigned NumEntries = getNumJTEntries(JT, JTI); 581 if (Opc == ARM::t2TBB && (NumEntries & 1)) 582 // Make sure the instruction that follows TBB is 2-byte aligned. 583 // FIXME: Constant island pass should insert an "ALIGN" instruction 584 // instead. 585 ++NumEntries; 586 return NumEntries * EntrySize + InstSize; 587 } 588 default: 589 // Otherwise, pseudo-instruction sizes are zero. 590 return 0; 591 } 592 } 593 } 594 return 0; // Not reached 595} 596 597void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 598 MachineBasicBlock::iterator I, DebugLoc DL, 599 unsigned DestReg, unsigned SrcReg, 600 bool KillSrc) const { 601 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 602 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 603 604 if (GPRDest && GPRSrc) { 605 AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 606 .addReg(SrcReg, getKillRegState(KillSrc)))); 607 return; 608 } 609 610 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 611 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 612 613 unsigned Opc; 614 if (SPRDest && SPRSrc) 615 Opc = ARM::VMOVS; 616 else if (GPRDest && SPRSrc) 617 Opc = ARM::VMOVRS; 618 else if (SPRDest && GPRSrc) 619 Opc = ARM::VMOVSR; 620 else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 621 Opc = ARM::VMOVD; 622 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 623 Opc = ARM::VMOVQ; 624 else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) 625 Opc = ARM::VMOVQQ; 626 else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) 627 Opc = ARM::VMOVQQQQ; 628 else 629 llvm_unreachable("Impossible reg-to-reg copy"); 630 631 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 632 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 633 if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) 634 AddDefaultPred(MIB); 635} 636 637static const 638MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, 639 unsigned Reg, unsigned SubIdx, unsigned State, 640 const TargetRegisterInfo *TRI) { 641 if (!SubIdx) 642 return MIB.addReg(Reg, State); 643 644 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 645 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 646 return MIB.addReg(Reg, State, SubIdx); 647} 648 649void ARMBaseInstrInfo:: 650storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 651 unsigned SrcReg, bool isKill, int FI, 652 const TargetRegisterClass *RC, 653 const TargetRegisterInfo *TRI) const { 654 DebugLoc DL; 655 if (I != MBB.end()) DL = I->getDebugLoc(); 656 MachineFunction &MF = *MBB.getParent(); 657 MachineFrameInfo &MFI = *MF.getFrameInfo(); 658 unsigned Align = MFI.getObjectAlignment(FI); 659 660 MachineMemOperand *MMO = 661 MF.getMachineMemOperand(MachinePointerInfo( 662 PseudoSourceValue::getFixedStack(FI)), 663 MachineMemOperand::MOStore, 664 MFI.getObjectSize(FI), 665 Align); 666 667 // tGPR is used sometimes in ARM instructions that need to avoid using 668 // certain registers. Just treat it as GPR here. Likewise, rGPR. 669 if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass 670 || RC == ARM::rGPRRegisterClass) 671 RC = ARM::GPRRegisterClass; 672 673 switch (RC->getID()) { 674 case ARM::GPRRegClassID: 675 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 676 .addReg(SrcReg, getKillRegState(isKill)) 677 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 678 break; 679 case ARM::SPRRegClassID: 680 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 681 .addReg(SrcReg, getKillRegState(isKill)) 682 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 683 break; 684 case ARM::DPRRegClassID: 685 case ARM::DPR_VFP2RegClassID: 686 case ARM::DPR_8RegClassID: 687 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 688 .addReg(SrcReg, getKillRegState(isKill)) 689 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 690 break; 691 case ARM::QPRRegClassID: 692 case ARM::QPR_VFP2RegClassID: 693 case ARM::QPR_8RegClassID: 694 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 695 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) 696 .addFrameIndex(FI).addImm(16) 697 .addReg(SrcReg, getKillRegState(isKill)) 698 .addMemOperand(MMO)); 699 } else { 700 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) 701 .addReg(SrcReg, getKillRegState(isKill)) 702 .addFrameIndex(FI) 703 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 704 .addMemOperand(MMO)); 705 } 706 break; 707 case ARM::QQPRRegClassID: 708 case ARM::QQPR_VFP2RegClassID: 709 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 710 // FIXME: It's possible to only store part of the QQ register if the 711 // spilled def has a sub-register index. 712 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 713 .addFrameIndex(FI).addImm(16) 714 .addReg(SrcReg, getKillRegState(isKill)) 715 .addMemOperand(MMO)); 716 } else { 717 MachineInstrBuilder MIB = 718 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) 719 .addFrameIndex(FI) 720 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) 721 .addMemOperand(MMO); 722 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 723 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 724 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 725 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 726 } 727 break; 728 case ARM::QQQQPRRegClassID: { 729 MachineInstrBuilder MIB = 730 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) 731 .addFrameIndex(FI) 732 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) 733 .addMemOperand(MMO); 734 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 735 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 736 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 737 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 738 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 739 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 740 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 741 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 742 break; 743 } 744 default: 745 llvm_unreachable("Unknown regclass!"); 746 } 747} 748 749unsigned 750ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 751 int &FrameIndex) const { 752 switch (MI->getOpcode()) { 753 default: break; 754 case ARM::STRrs: 755 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 756 if (MI->getOperand(1).isFI() && 757 MI->getOperand(2).isReg() && 758 MI->getOperand(3).isImm() && 759 MI->getOperand(2).getReg() == 0 && 760 MI->getOperand(3).getImm() == 0) { 761 FrameIndex = MI->getOperand(1).getIndex(); 762 return MI->getOperand(0).getReg(); 763 } 764 break; 765 case ARM::STRi12: 766 case ARM::t2STRi12: 767 case ARM::tSpill: 768 case ARM::VSTRD: 769 case ARM::VSTRS: 770 if (MI->getOperand(1).isFI() && 771 MI->getOperand(2).isImm() && 772 MI->getOperand(2).getImm() == 0) { 773 FrameIndex = MI->getOperand(1).getIndex(); 774 return MI->getOperand(0).getReg(); 775 } 776 break; 777 case ARM::VST1q64Pseudo: 778 if (MI->getOperand(0).isFI() && 779 MI->getOperand(2).getSubReg() == 0) { 780 FrameIndex = MI->getOperand(0).getIndex(); 781 return MI->getOperand(2).getReg(); 782 } 783 break; 784 case ARM::VSTMQ: 785 if (MI->getOperand(1).isFI() && 786 MI->getOperand(2).isImm() && 787 MI->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia) && 788 MI->getOperand(0).getSubReg() == 0) { 789 FrameIndex = MI->getOperand(1).getIndex(); 790 return MI->getOperand(0).getReg(); 791 } 792 break; 793 } 794 795 return 0; 796} 797 798void ARMBaseInstrInfo:: 799loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 800 unsigned DestReg, int FI, 801 const TargetRegisterClass *RC, 802 const TargetRegisterInfo *TRI) const { 803 DebugLoc DL; 804 if (I != MBB.end()) DL = I->getDebugLoc(); 805 MachineFunction &MF = *MBB.getParent(); 806 MachineFrameInfo &MFI = *MF.getFrameInfo(); 807 unsigned Align = MFI.getObjectAlignment(FI); 808 MachineMemOperand *MMO = 809 MF.getMachineMemOperand( 810 MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), 811 MachineMemOperand::MOLoad, 812 MFI.getObjectSize(FI), 813 Align); 814 815 // tGPR is used sometimes in ARM instructions that need to avoid using 816 // certain registers. Just treat it as GPR here. 817 if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass 818 || RC == ARM::rGPRRegisterClass) 819 RC = ARM::GPRRegisterClass; 820 821 switch (RC->getID()) { 822 case ARM::GPRRegClassID: 823 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 824 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 825 break; 826 case ARM::SPRRegClassID: 827 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 828 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 829 break; 830 case ARM::DPRRegClassID: 831 case ARM::DPR_VFP2RegClassID: 832 case ARM::DPR_8RegClassID: 833 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 834 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 835 break; 836 case ARM::QPRRegClassID: 837 case ARM::QPR_VFP2RegClassID: 838 case ARM::QPR_8RegClassID: 839 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 840 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) 841 .addFrameIndex(FI).addImm(16) 842 .addMemOperand(MMO)); 843 } else { 844 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) 845 .addFrameIndex(FI) 846 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) 847 .addMemOperand(MMO)); 848 } 849 break; 850 case ARM::QQPRRegClassID: 851 case ARM::QQPR_VFP2RegClassID: 852 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 853 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 854 .addFrameIndex(FI).addImm(16) 855 .addMemOperand(MMO)); 856 } else { 857 MachineInstrBuilder MIB = 858 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) 859 .addFrameIndex(FI) 860 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) 861 .addMemOperand(MMO); 862 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 863 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 864 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 865 AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 866 } 867 break; 868 case ARM::QQQQPRRegClassID: { 869 MachineInstrBuilder MIB = 870 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) 871 .addFrameIndex(FI) 872 .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))) 873 .addMemOperand(MMO); 874 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 875 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 876 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 877 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 878 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); 879 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); 880 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); 881 AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); 882 break; 883 } 884 default: 885 llvm_unreachable("Unknown regclass!"); 886 } 887} 888 889unsigned 890ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 891 int &FrameIndex) const { 892 switch (MI->getOpcode()) { 893 default: break; 894 case ARM::LDRrs: 895 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 896 if (MI->getOperand(1).isFI() && 897 MI->getOperand(2).isReg() && 898 MI->getOperand(3).isImm() && 899 MI->getOperand(2).getReg() == 0 && 900 MI->getOperand(3).getImm() == 0) { 901 FrameIndex = MI->getOperand(1).getIndex(); 902 return MI->getOperand(0).getReg(); 903 } 904 break; 905 case ARM::LDRi12: 906 case ARM::t2LDRi12: 907 case ARM::tRestore: 908 case ARM::VLDRD: 909 case ARM::VLDRS: 910 if (MI->getOperand(1).isFI() && 911 MI->getOperand(2).isImm() && 912 MI->getOperand(2).getImm() == 0) { 913 FrameIndex = MI->getOperand(1).getIndex(); 914 return MI->getOperand(0).getReg(); 915 } 916 break; 917 case ARM::VLD1q64Pseudo: 918 if (MI->getOperand(1).isFI() && 919 MI->getOperand(0).getSubReg() == 0) { 920 FrameIndex = MI->getOperand(1).getIndex(); 921 return MI->getOperand(0).getReg(); 922 } 923 break; 924 case ARM::VLDMQ: 925 if (MI->getOperand(1).isFI() && 926 MI->getOperand(2).isImm() && 927 MI->getOperand(2).getImm() == ARM_AM::getAM4ModeImm(ARM_AM::ia) && 928 MI->getOperand(0).getSubReg() == 0) { 929 FrameIndex = MI->getOperand(1).getIndex(); 930 return MI->getOperand(0).getReg(); 931 } 932 break; 933 } 934 935 return 0; 936} 937 938MachineInstr* 939ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 940 int FrameIx, uint64_t Offset, 941 const MDNode *MDPtr, 942 DebugLoc DL) const { 943 MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) 944 .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); 945 return &*MIB; 946} 947 948/// Create a copy of a const pool value. Update CPI to the new index and return 949/// the label UID. 950static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 951 MachineConstantPool *MCP = MF.getConstantPool(); 952 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 953 954 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 955 assert(MCPE.isMachineConstantPoolEntry() && 956 "Expecting a machine constantpool entry!"); 957 ARMConstantPoolValue *ACPV = 958 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 959 960 unsigned PCLabelId = AFI->createConstPoolEntryUId(); 961 ARMConstantPoolValue *NewCPV = 0; 962 // FIXME: The below assumes PIC relocation model and that the function 963 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 964 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 965 // instructions, so that's probably OK, but is PIC always correct when 966 // we get here? 967 if (ACPV->isGlobalValue()) 968 NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, 969 ARMCP::CPValue, 4); 970 else if (ACPV->isExtSymbol()) 971 NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), 972 ACPV->getSymbol(), PCLabelId, 4); 973 else if (ACPV->isBlockAddress()) 974 NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, 975 ARMCP::CPBlockAddress, 4); 976 else if (ACPV->isLSDA()) 977 NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId, 978 ARMCP::CPLSDA, 4); 979 else 980 llvm_unreachable("Unexpected ARM constantpool value type!!"); 981 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 982 return PCLabelId; 983} 984 985void ARMBaseInstrInfo:: 986reMaterialize(MachineBasicBlock &MBB, 987 MachineBasicBlock::iterator I, 988 unsigned DestReg, unsigned SubIdx, 989 const MachineInstr *Orig, 990 const TargetRegisterInfo &TRI) const { 991 unsigned Opcode = Orig->getOpcode(); 992 switch (Opcode) { 993 default: { 994 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 995 MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 996 MBB.insert(I, MI); 997 break; 998 } 999 case ARM::tLDRpci_pic: 1000 case ARM::t2LDRpci_pic: { 1001 MachineFunction &MF = *MBB.getParent(); 1002 unsigned CPI = Orig->getOperand(1).getIndex(); 1003 unsigned PCLabelId = duplicateCPV(MF, CPI); 1004 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 1005 DestReg) 1006 .addConstantPoolIndex(CPI).addImm(PCLabelId); 1007 (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 1008 break; 1009 } 1010 } 1011} 1012 1013MachineInstr * 1014ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 1015 MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); 1016 switch(Orig->getOpcode()) { 1017 case ARM::tLDRpci_pic: 1018 case ARM::t2LDRpci_pic: { 1019 unsigned CPI = Orig->getOperand(1).getIndex(); 1020 unsigned PCLabelId = duplicateCPV(MF, CPI); 1021 Orig->getOperand(1).setIndex(CPI); 1022 Orig->getOperand(2).setImm(PCLabelId); 1023 break; 1024 } 1025 } 1026 return MI; 1027} 1028 1029bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 1030 const MachineInstr *MI1) const { 1031 int Opcode = MI0->getOpcode(); 1032 if (Opcode == ARM::t2LDRpci || 1033 Opcode == ARM::t2LDRpci_pic || 1034 Opcode == ARM::tLDRpci || 1035 Opcode == ARM::tLDRpci_pic) { 1036 if (MI1->getOpcode() != Opcode) 1037 return false; 1038 if (MI0->getNumOperands() != MI1->getNumOperands()) 1039 return false; 1040 1041 const MachineOperand &MO0 = MI0->getOperand(1); 1042 const MachineOperand &MO1 = MI1->getOperand(1); 1043 if (MO0.getOffset() != MO1.getOffset()) 1044 return false; 1045 1046 const MachineFunction *MF = MI0->getParent()->getParent(); 1047 const MachineConstantPool *MCP = MF->getConstantPool(); 1048 int CPI0 = MO0.getIndex(); 1049 int CPI1 = MO1.getIndex(); 1050 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1051 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1052 ARMConstantPoolValue *ACPV0 = 1053 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1054 ARMConstantPoolValue *ACPV1 = 1055 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1056 return ACPV0->hasSameValue(ACPV1); 1057 } 1058 1059 return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1060} 1061 1062/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1063/// determine if two loads are loading from the same base address. It should 1064/// only return true if the base pointers are the same and the only differences 1065/// between the two addresses is the offset. It also returns the offsets by 1066/// reference. 1067bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1068 int64_t &Offset1, 1069 int64_t &Offset2) const { 1070 // Don't worry about Thumb: just ARM and Thumb2. 1071 if (Subtarget.isThumb1Only()) return false; 1072 1073 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1074 return false; 1075 1076 switch (Load1->getMachineOpcode()) { 1077 default: 1078 return false; 1079 case ARM::LDRi12: 1080 case ARM::LDRBi12: 1081 case ARM::LDRD: 1082 case ARM::LDRH: 1083 case ARM::LDRSB: 1084 case ARM::LDRSH: 1085 case ARM::VLDRD: 1086 case ARM::VLDRS: 1087 case ARM::t2LDRi8: 1088 case ARM::t2LDRDi8: 1089 case ARM::t2LDRSHi8: 1090 case ARM::t2LDRi12: 1091 case ARM::t2LDRSHi12: 1092 break; 1093 } 1094 1095 switch (Load2->getMachineOpcode()) { 1096 default: 1097 return false; 1098 case ARM::LDRi12: 1099 case ARM::LDRBi12: 1100 case ARM::LDRD: 1101 case ARM::LDRH: 1102 case ARM::LDRSB: 1103 case ARM::LDRSH: 1104 case ARM::VLDRD: 1105 case ARM::VLDRS: 1106 case ARM::t2LDRi8: 1107 case ARM::t2LDRDi8: 1108 case ARM::t2LDRSHi8: 1109 case ARM::t2LDRi12: 1110 case ARM::t2LDRSHi12: 1111 break; 1112 } 1113 1114 // Check if base addresses and chain operands match. 1115 if (Load1->getOperand(0) != Load2->getOperand(0) || 1116 Load1->getOperand(4) != Load2->getOperand(4)) 1117 return false; 1118 1119 // Index should be Reg0. 1120 if (Load1->getOperand(3) != Load2->getOperand(3)) 1121 return false; 1122 1123 // Determine the offsets. 1124 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1125 isa<ConstantSDNode>(Load2->getOperand(1))) { 1126 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1127 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1128 return true; 1129 } 1130 1131 return false; 1132} 1133 1134/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1135/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should 1136/// be scheduled togther. On some targets if two loads are loading from 1137/// addresses in the same cache line, it's better if they are scheduled 1138/// together. This function takes two integers that represent the load offsets 1139/// from the common base address. It returns true if it decides it's desirable 1140/// to schedule the two loads together. "NumLoads" is the number of loads that 1141/// have already been scheduled after Load1. 1142bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1143 int64_t Offset1, int64_t Offset2, 1144 unsigned NumLoads) const { 1145 // Don't worry about Thumb: just ARM and Thumb2. 1146 if (Subtarget.isThumb1Only()) return false; 1147 1148 assert(Offset2 > Offset1); 1149 1150 if ((Offset2 - Offset1) / 8 > 64) 1151 return false; 1152 1153 if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) 1154 return false; // FIXME: overly conservative? 1155 1156 // Four loads in a row should be sufficient. 1157 if (NumLoads >= 3) 1158 return false; 1159 1160 return true; 1161} 1162 1163bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1164 const MachineBasicBlock *MBB, 1165 const MachineFunction &MF) const { 1166 // Debug info is never a scheduling boundary. It's necessary to be explicit 1167 // due to the special treatment of IT instructions below, otherwise a 1168 // dbg_value followed by an IT will result in the IT instruction being 1169 // considered a scheduling hazard, which is wrong. It should be the actual 1170 // instruction preceding the dbg_value instruction(s), just like it is 1171 // when debug info is not present. 1172 if (MI->isDebugValue()) 1173 return false; 1174 1175 // Terminators and labels can't be scheduled around. 1176 if (MI->getDesc().isTerminator() || MI->isLabel()) 1177 return true; 1178 1179 // Treat the start of the IT block as a scheduling boundary, but schedule 1180 // t2IT along with all instructions following it. 1181 // FIXME: This is a big hammer. But the alternative is to add all potential 1182 // true and anti dependencies to IT block instructions as implicit operands 1183 // to the t2IT instruction. The added compile time and complexity does not 1184 // seem worth it. 1185 MachineBasicBlock::const_iterator I = MI; 1186 // Make sure to skip any dbg_value instructions 1187 while (++I != MBB->end() && I->isDebugValue()) 1188 ; 1189 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1190 return true; 1191 1192 // Don't attempt to schedule around any instruction that defines 1193 // a stack-oriented pointer, as it's unlikely to be profitable. This 1194 // saves compile time, because it doesn't require every single 1195 // stack slot reference to depend on the instruction that does the 1196 // modification. 1197 if (MI->definesRegister(ARM::SP)) 1198 return true; 1199 1200 return false; 1201} 1202 1203bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 1204 unsigned NumCyles, 1205 unsigned ExtraPredCycles, 1206 float Probability, 1207 float Confidence) const { 1208 if (!NumCyles) 1209 return false; 1210 1211 // Attempt to estimate the relative costs of predication versus branching. 1212 float UnpredCost = Probability * NumCyles; 1213 UnpredCost += 1.0; // The branch itself 1214 UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); 1215 1216 return (float)(NumCyles + ExtraPredCycles) < UnpredCost; 1217} 1218 1219bool ARMBaseInstrInfo:: 1220isProfitableToIfCvt(MachineBasicBlock &TMBB, 1221 unsigned TCycles, unsigned TExtra, 1222 MachineBasicBlock &FMBB, 1223 unsigned FCycles, unsigned FExtra, 1224 float Probability, float Confidence) const { 1225 if (!TCycles || !FCycles) 1226 return false; 1227 1228 // Attempt to estimate the relative costs of predication versus branching. 1229 float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles; 1230 UnpredCost += 1.0; // The branch itself 1231 UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); 1232 1233 return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost; 1234} 1235 1236/// getInstrPredicate - If instruction is predicated, returns its predicate 1237/// condition, otherwise returns AL. It also returns the condition code 1238/// register by reference. 1239ARMCC::CondCodes 1240llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 1241 int PIdx = MI->findFirstPredOperandIdx(); 1242 if (PIdx == -1) { 1243 PredReg = 0; 1244 return ARMCC::AL; 1245 } 1246 1247 PredReg = MI->getOperand(PIdx+1).getReg(); 1248 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 1249} 1250 1251 1252int llvm::getMatchingCondBranchOpcode(int Opc) { 1253 if (Opc == ARM::B) 1254 return ARM::Bcc; 1255 else if (Opc == ARM::tB) 1256 return ARM::tBcc; 1257 else if (Opc == ARM::t2B) 1258 return ARM::t2Bcc; 1259 1260 llvm_unreachable("Unknown unconditional branch opcode!"); 1261 return 0; 1262} 1263 1264 1265void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 1266 MachineBasicBlock::iterator &MBBI, DebugLoc dl, 1267 unsigned DestReg, unsigned BaseReg, int NumBytes, 1268 ARMCC::CondCodes Pred, unsigned PredReg, 1269 const ARMBaseInstrInfo &TII) { 1270 bool isSub = NumBytes < 0; 1271 if (isSub) NumBytes = -NumBytes; 1272 1273 while (NumBytes) { 1274 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 1275 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 1276 assert(ThisVal && "Didn't extract field correctly"); 1277 1278 // We will handle these bits from offset, clear them. 1279 NumBytes &= ~ThisVal; 1280 1281 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 1282 1283 // Build the new ADD / SUB. 1284 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 1285 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 1286 .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 1287 .addImm((unsigned)Pred).addReg(PredReg).addReg(0); 1288 BaseReg = DestReg; 1289 } 1290} 1291 1292bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 1293 unsigned FrameReg, int &Offset, 1294 const ARMBaseInstrInfo &TII) { 1295 unsigned Opcode = MI.getOpcode(); 1296 const TargetInstrDesc &Desc = MI.getDesc(); 1297 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 1298 bool isSub = false; 1299 1300 // Memory operands in inline assembly always use AddrMode2. 1301 if (Opcode == ARM::INLINEASM) 1302 AddrMode = ARMII::AddrMode2; 1303 1304 if (Opcode == ARM::ADDri) { 1305 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 1306 if (Offset == 0) { 1307 // Turn it into a move. 1308 MI.setDesc(TII.get(ARM::MOVr)); 1309 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1310 MI.RemoveOperand(FrameRegIdx+1); 1311 Offset = 0; 1312 return true; 1313 } else if (Offset < 0) { 1314 Offset = -Offset; 1315 isSub = true; 1316 MI.setDesc(TII.get(ARM::SUBri)); 1317 } 1318 1319 // Common case: small offset, fits into instruction. 1320 if (ARM_AM::getSOImmVal(Offset) != -1) { 1321 // Replace the FrameIndex with sp / fp 1322 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1323 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 1324 Offset = 0; 1325 return true; 1326 } 1327 1328 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 1329 // as possible. 1330 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 1331 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 1332 1333 // We will handle these bits from offset, clear them. 1334 Offset &= ~ThisImmVal; 1335 1336 // Get the properly encoded SOImmVal field. 1337 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 1338 "Bit extraction didn't work?"); 1339 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 1340 } else { 1341 unsigned ImmIdx = 0; 1342 int InstrOffs = 0; 1343 unsigned NumBits = 0; 1344 unsigned Scale = 1; 1345 switch (AddrMode) { 1346 case ARMII::AddrMode_i12: { 1347 ImmIdx = FrameRegIdx + 1; 1348 InstrOffs = MI.getOperand(ImmIdx).getImm(); 1349 NumBits = 12; 1350 break; 1351 } 1352 case ARMII::AddrMode2: { 1353 ImmIdx = FrameRegIdx+2; 1354 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 1355 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1356 InstrOffs *= -1; 1357 NumBits = 12; 1358 break; 1359 } 1360 case ARMII::AddrMode3: { 1361 ImmIdx = FrameRegIdx+2; 1362 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 1363 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1364 InstrOffs *= -1; 1365 NumBits = 8; 1366 break; 1367 } 1368 case ARMII::AddrMode4: 1369 case ARMII::AddrMode6: 1370 // Can't fold any offset even if it's zero. 1371 return false; 1372 case ARMII::AddrMode5: { 1373 ImmIdx = FrameRegIdx+1; 1374 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 1375 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1376 InstrOffs *= -1; 1377 NumBits = 8; 1378 Scale = 4; 1379 break; 1380 } 1381 default: 1382 llvm_unreachable("Unsupported addressing mode!"); 1383 break; 1384 } 1385 1386 Offset += InstrOffs * Scale; 1387 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 1388 if (Offset < 0) { 1389 Offset = -Offset; 1390 isSub = true; 1391 } 1392 1393 // Attempt to fold address comp. if opcode has offset bits 1394 if (NumBits > 0) { 1395 // Common case: small offset, fits into instruction. 1396 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 1397 int ImmedOffset = Offset / Scale; 1398 unsigned Mask = (1 << NumBits) - 1; 1399 if ((unsigned)Offset <= Mask * Scale) { 1400 // Replace the FrameIndex with sp 1401 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1402 // FIXME: When addrmode2 goes away, this will simplify (like the 1403 // T2 version), as the LDR.i12 versions don't need the encoding 1404 // tricks for the offset value. 1405 if (isSub) { 1406 if (AddrMode == ARMII::AddrMode_i12) 1407 ImmedOffset = -ImmedOffset; 1408 else 1409 ImmedOffset |= 1 << NumBits; 1410 } 1411 ImmOp.ChangeToImmediate(ImmedOffset); 1412 Offset = 0; 1413 return true; 1414 } 1415 1416 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 1417 ImmedOffset = ImmedOffset & Mask; 1418 if (isSub) { 1419 if (AddrMode == ARMII::AddrMode_i12) 1420 ImmedOffset = -ImmedOffset; 1421 else 1422 ImmedOffset |= 1 << NumBits; 1423 } 1424 ImmOp.ChangeToImmediate(ImmedOffset); 1425 Offset &= ~(Mask*Scale); 1426 } 1427 } 1428 1429 Offset = (isSub) ? -Offset : Offset; 1430 return Offset == 0; 1431} 1432 1433bool ARMBaseInstrInfo:: 1434AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, 1435 int &CmpValue) const { 1436 switch (MI->getOpcode()) { 1437 default: break; 1438 case ARM::CMPri: 1439 case ARM::CMPzri: 1440 case ARM::t2CMPri: 1441 case ARM::t2CMPzri: 1442 SrcReg = MI->getOperand(0).getReg(); 1443 CmpMask = ~0; 1444 CmpValue = MI->getOperand(1).getImm(); 1445 return true; 1446 case ARM::TSTri: 1447 case ARM::t2TSTri: 1448 SrcReg = MI->getOperand(0).getReg(); 1449 CmpMask = MI->getOperand(1).getImm(); 1450 CmpValue = 0; 1451 return true; 1452 } 1453 1454 return false; 1455} 1456 1457/// isSuitableForMask - Identify a suitable 'and' instruction that 1458/// operates on the given source register and applies the same mask 1459/// as a 'tst' instruction. Provide a limited look-through for copies. 1460/// When successful, MI will hold the found instruction. 1461static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 1462 int CmpMask, bool CommonUse) { 1463 switch (MI->getOpcode()) { 1464 case ARM::ANDri: 1465 case ARM::t2ANDri: 1466 if (CmpMask != MI->getOperand(2).getImm()) 1467 return false; 1468 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 1469 return true; 1470 break; 1471 case ARM::COPY: { 1472 // Walk down one instruction which is potentially an 'and'. 1473 const MachineInstr &Copy = *MI; 1474 MachineBasicBlock::iterator AND( 1475 llvm::next(MachineBasicBlock::iterator(MI))); 1476 if (AND == MI->getParent()->end()) return false; 1477 MI = AND; 1478 return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 1479 CmpMask, true); 1480 } 1481 } 1482 1483 return false; 1484} 1485 1486/// OptimizeCompareInstr - Convert the instruction supplying the argument to the 1487/// comparison into one that sets the zero bit in the flags register. Update the 1488/// iterator *only* if a transformation took place. 1489bool ARMBaseInstrInfo:: 1490OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, 1491 int CmpValue, const MachineRegisterInfo *MRI, 1492 MachineBasicBlock::iterator &MII) const { 1493 if (CmpValue != 0) 1494 return false; 1495 1496 MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); 1497 if (llvm::next(DI) != MRI->def_end()) 1498 // Only support one definition. 1499 return false; 1500 1501 MachineInstr *MI = &*DI; 1502 1503 // Masked compares sometimes use the same register as the corresponding 'and'. 1504 if (CmpMask != ~0) { 1505 if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { 1506 MI = 0; 1507 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 1508 UE = MRI->use_end(); UI != UE; ++UI) { 1509 if (UI->getParent() != CmpInstr->getParent()) continue; 1510 MachineInstr *PotentialAND = &*UI; 1511 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) 1512 continue; 1513 MI = PotentialAND; 1514 break; 1515 } 1516 if (!MI) return false; 1517 } 1518 } 1519 1520 // Conservatively refuse to convert an instruction which isn't in the same BB 1521 // as the comparison. 1522 if (MI->getParent() != CmpInstr->getParent()) 1523 return false; 1524 1525 // Check that CPSR isn't set between the comparison instruction and the one we 1526 // want to change. 1527 MachineBasicBlock::const_iterator I = CmpInstr, E = MI, 1528 B = MI->getParent()->begin(); 1529 1530 // Early exit if CmpInstr is at the beginning of the BB. 1531 if (I == B) return false; 1532 1533 --I; 1534 for (; I != E; --I) { 1535 const MachineInstr &Instr = *I; 1536 1537 for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { 1538 const MachineOperand &MO = Instr.getOperand(IO); 1539 if (!MO.isReg()) continue; 1540 1541 // This instruction modifies or uses CPSR after the one we want to 1542 // change. We can't do this transformation. 1543 if (MO.getReg() == ARM::CPSR) 1544 return false; 1545 } 1546 1547 if (I == B) 1548 // The 'and' is below the comparison instruction. 1549 return false; 1550 } 1551 1552 // Set the "zero" bit in CPSR. 1553 switch (MI->getOpcode()) { 1554 default: break; 1555 case ARM::ADDri: 1556 case ARM::ANDri: 1557 case ARM::t2ANDri: 1558 case ARM::SUBri: 1559 case ARM::t2ADDri: 1560 case ARM::t2SUBri: 1561 MI->RemoveOperand(5); 1562 MachineInstrBuilder(MI) 1563 .addReg(ARM::CPSR, RegState::Define | RegState::Implicit); 1564 MII = llvm::next(MachineBasicBlock::iterator(CmpInstr)); 1565 CmpInstr->eraseFromParent(); 1566 return true; 1567 } 1568 1569 return false; 1570} 1571 1572unsigned 1573ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 1574 const MachineInstr *MI) const { 1575 if (!ItinData || ItinData->isEmpty()) 1576 return 1; 1577 1578 const TargetInstrDesc &Desc = MI->getDesc(); 1579 unsigned Class = Desc.getSchedClass(); 1580 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 1581 if (UOps) 1582 return UOps; 1583 1584 unsigned Opc = MI->getOpcode(); 1585 switch (Opc) { 1586 default: 1587 llvm_unreachable("Unexpected multi-uops instruction!"); 1588 break; 1589 case ARM::VLDMQ: 1590 case ARM::VSTMQ: 1591 return 2; 1592 1593 // The number of uOps for load / store multiple are determined by the number 1594 // registers. 1595 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 1596 // same cycle. The scheduling for the first load / store must be done 1597 // separately by assuming the the address is not 64-bit aligned. 1598 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 1599 // is not 64-bit aligned, then AGU would take an extra cycle. 1600 // For VFP / NEON load / store multiple, the formula is 1601 // (#reg / 2) + (#reg % 2) + 1. 1602 case ARM::VLDMD: 1603 case ARM::VLDMS: 1604 case ARM::VLDMD_UPD: 1605 case ARM::VLDMS_UPD: 1606 case ARM::VSTMD: 1607 case ARM::VSTMS: 1608 case ARM::VSTMD_UPD: 1609 case ARM::VSTMS_UPD: { 1610 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 1611 return (NumRegs / 2) + (NumRegs % 2) + 1; 1612 } 1613 case ARM::LDM_RET: 1614 case ARM::LDM: 1615 case ARM::LDM_UPD: 1616 case ARM::STM: 1617 case ARM::STM_UPD: 1618 case ARM::tLDM: 1619 case ARM::tLDM_UPD: 1620 case ARM::tSTM_UPD: 1621 case ARM::tPOP_RET: 1622 case ARM::tPOP: 1623 case ARM::tPUSH: 1624 case ARM::t2LDM_RET: 1625 case ARM::t2LDM: 1626 case ARM::t2LDM_UPD: 1627 case ARM::t2STM: 1628 case ARM::t2STM_UPD: { 1629 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 1630 if (Subtarget.isCortexA8()) { 1631 if (NumRegs < 4) 1632 return 2; 1633 // 4 registers would be issued: 2, 2. 1634 // 5 registers would be issued: 2, 2, 1. 1635 UOps = (NumRegs / 2); 1636 if (NumRegs % 2) 1637 ++UOps; 1638 return UOps; 1639 } else if (Subtarget.isCortexA9()) { 1640 UOps = (NumRegs / 2); 1641 // If there are odd number of registers or if it's not 64-bit aligned, 1642 // then it takes an extra AGU (Address Generation Unit) cycle. 1643 if ((NumRegs % 2) || 1644 !MI->hasOneMemOperand() || 1645 (*MI->memoperands_begin())->getAlignment() < 8) 1646 ++UOps; 1647 return UOps; 1648 } else { 1649 // Assume the worst. 1650 return NumRegs; 1651 } 1652 } 1653 } 1654} 1655 1656int 1657ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 1658 const TargetInstrDesc &DefTID, 1659 unsigned DefClass, 1660 unsigned DefIdx, unsigned DefAlign) const { 1661 int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; 1662 if (RegNo <= 0) 1663 // Def is the address writeback. 1664 return ItinData->getOperandCycle(DefClass, DefIdx); 1665 1666 int DefCycle; 1667 if (Subtarget.isCortexA8()) { 1668 // (regno / 2) + (regno % 2) + 1 1669 DefCycle = RegNo / 2 + 1; 1670 if (RegNo % 2) 1671 ++DefCycle; 1672 } else if (Subtarget.isCortexA9()) { 1673 DefCycle = RegNo; 1674 bool isSLoad = false; 1675 switch (DefTID.getOpcode()) { 1676 default: break; 1677 case ARM::VLDMS: 1678 case ARM::VLDMS_UPD: 1679 isSLoad = true; 1680 break; 1681 } 1682 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 1683 // then it takes an extra cycle. 1684 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 1685 ++DefCycle; 1686 } else { 1687 // Assume the worst. 1688 DefCycle = RegNo + 2; 1689 } 1690 1691 return DefCycle; 1692} 1693 1694int 1695ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 1696 const TargetInstrDesc &DefTID, 1697 unsigned DefClass, 1698 unsigned DefIdx, unsigned DefAlign) const { 1699 int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1; 1700 if (RegNo <= 0) 1701 // Def is the address writeback. 1702 return ItinData->getOperandCycle(DefClass, DefIdx); 1703 1704 int DefCycle; 1705 if (Subtarget.isCortexA8()) { 1706 // 4 registers would be issued: 1, 2, 1. 1707 // 5 registers would be issued: 1, 2, 2. 1708 DefCycle = RegNo / 2; 1709 if (DefCycle < 1) 1710 DefCycle = 1; 1711 // Result latency is issue cycle + 2: E2. 1712 DefCycle += 2; 1713 } else if (Subtarget.isCortexA9()) { 1714 DefCycle = (RegNo / 2); 1715 // If there are odd number of registers or if it's not 64-bit aligned, 1716 // then it takes an extra AGU (Address Generation Unit) cycle. 1717 if ((RegNo % 2) || DefAlign < 8) 1718 ++DefCycle; 1719 // Result latency is AGU cycles + 2. 1720 DefCycle += 2; 1721 } else { 1722 // Assume the worst. 1723 DefCycle = RegNo + 2; 1724 } 1725 1726 return DefCycle; 1727} 1728 1729int 1730ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 1731 const TargetInstrDesc &UseTID, 1732 unsigned UseClass, 1733 unsigned UseIdx, unsigned UseAlign) const { 1734 int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; 1735 if (RegNo <= 0) 1736 return ItinData->getOperandCycle(UseClass, UseIdx); 1737 1738 int UseCycle; 1739 if (Subtarget.isCortexA8()) { 1740 // (regno / 2) + (regno % 2) + 1 1741 UseCycle = RegNo / 2 + 1; 1742 if (RegNo % 2) 1743 ++UseCycle; 1744 } else if (Subtarget.isCortexA9()) { 1745 UseCycle = RegNo; 1746 bool isSStore = false; 1747 switch (UseTID.getOpcode()) { 1748 default: break; 1749 case ARM::VSTMS: 1750 case ARM::VSTMS_UPD: 1751 isSStore = true; 1752 break; 1753 } 1754 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 1755 // then it takes an extra cycle. 1756 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 1757 ++UseCycle; 1758 } else { 1759 // Assume the worst. 1760 UseCycle = RegNo + 2; 1761 } 1762 1763 return UseCycle; 1764} 1765 1766int 1767ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 1768 const TargetInstrDesc &UseTID, 1769 unsigned UseClass, 1770 unsigned UseIdx, unsigned UseAlign) const { 1771 int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1; 1772 if (RegNo <= 0) 1773 return ItinData->getOperandCycle(UseClass, UseIdx); 1774 1775 int UseCycle; 1776 if (Subtarget.isCortexA8()) { 1777 UseCycle = RegNo / 2; 1778 if (UseCycle < 2) 1779 UseCycle = 2; 1780 // Read in E3. 1781 UseCycle += 2; 1782 } else if (Subtarget.isCortexA9()) { 1783 UseCycle = (RegNo / 2); 1784 // If there are odd number of registers or if it's not 64-bit aligned, 1785 // then it takes an extra AGU (Address Generation Unit) cycle. 1786 if ((RegNo % 2) || UseAlign < 8) 1787 ++UseCycle; 1788 } else { 1789 // Assume the worst. 1790 UseCycle = 1; 1791 } 1792 return UseCycle; 1793} 1794 1795int 1796ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 1797 const TargetInstrDesc &DefTID, 1798 unsigned DefIdx, unsigned DefAlign, 1799 const TargetInstrDesc &UseTID, 1800 unsigned UseIdx, unsigned UseAlign) const { 1801 unsigned DefClass = DefTID.getSchedClass(); 1802 unsigned UseClass = UseTID.getSchedClass(); 1803 1804 if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands()) 1805 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 1806 1807 // This may be a def / use of a variable_ops instruction, the operand 1808 // latency might be determinable dynamically. Let the target try to 1809 // figure it out. 1810 int DefCycle = -1; 1811 bool LdmBypass = false; 1812 switch (DefTID.getOpcode()) { 1813 default: 1814 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 1815 break; 1816 case ARM::VLDMD: 1817 case ARM::VLDMS: 1818 case ARM::VLDMD_UPD: 1819 case ARM::VLDMS_UPD: { 1820 DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); 1821 break; 1822 } 1823 case ARM::LDM_RET: 1824 case ARM::LDM: 1825 case ARM::LDM_UPD: 1826 case ARM::tLDM: 1827 case ARM::tLDM_UPD: 1828 case ARM::tPUSH: 1829 case ARM::t2LDM_RET: 1830 case ARM::t2LDM: 1831 case ARM::t2LDM_UPD: { 1832 LdmBypass = 1; 1833 DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); 1834 break; 1835 } 1836 } 1837 1838 if (DefCycle == -1) 1839 // We can't seem to determine the result latency of the def, assume it's 2. 1840 DefCycle = 2; 1841 1842 int UseCycle = -1; 1843 switch (UseTID.getOpcode()) { 1844 default: 1845 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 1846 break; 1847 case ARM::VSTMD: 1848 case ARM::VSTMS: 1849 case ARM::VSTMD_UPD: 1850 case ARM::VSTMS_UPD: { 1851 UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); 1852 break; 1853 } 1854 case ARM::STM: 1855 case ARM::STM_UPD: 1856 case ARM::tSTM_UPD: 1857 case ARM::tPOP_RET: 1858 case ARM::tPOP: 1859 case ARM::t2STM: 1860 case ARM::t2STM_UPD: { 1861 UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); 1862 break; 1863 } 1864 } 1865 1866 if (UseCycle == -1) 1867 // Assume it's read in the first stage. 1868 UseCycle = 1; 1869 1870 UseCycle = DefCycle - UseCycle + 1; 1871 if (UseCycle > 0) { 1872 if (LdmBypass) { 1873 // It's a variable_ops instruction so we can't use DefIdx here. Just use 1874 // first def operand. 1875 if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1, 1876 UseClass, UseIdx)) 1877 --UseCycle; 1878 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 1879 UseClass, UseIdx)) 1880 --UseCycle; 1881 } 1882 1883 return UseCycle; 1884} 1885 1886int 1887ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 1888 const MachineInstr *DefMI, unsigned DefIdx, 1889 const MachineInstr *UseMI, unsigned UseIdx) const { 1890 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 1891 DefMI->isRegSequence() || DefMI->isImplicitDef()) 1892 return 1; 1893 1894 const TargetInstrDesc &DefTID = DefMI->getDesc(); 1895 if (!ItinData || ItinData->isEmpty()) 1896 return DefTID.mayLoad() ? 3 : 1; 1897 1898 1899 const TargetInstrDesc &UseTID = UseMI->getDesc(); 1900 const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 1901 if (DefMO.getReg() == ARM::CPSR) { 1902 if (DefMI->getOpcode() == ARM::FMSTAT) { 1903 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 1904 return Subtarget.isCortexA9() ? 1 : 20; 1905 } 1906 1907 // CPSR set and branch can be paired in the same cycle. 1908 if (UseTID.isBranch()) 1909 return 0; 1910 } 1911 1912 unsigned DefAlign = DefMI->hasOneMemOperand() 1913 ? (*DefMI->memoperands_begin())->getAlignment() : 0; 1914 unsigned UseAlign = UseMI->hasOneMemOperand() 1915 ? (*UseMI->memoperands_begin())->getAlignment() : 0; 1916 int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, 1917 UseTID, UseIdx, UseAlign); 1918 1919 if (Latency > 1 && 1920 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 1921 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 1922 // variants are one cycle cheaper. 1923 switch (DefTID.getOpcode()) { 1924 default: break; 1925 case ARM::LDRrs: 1926 case ARM::LDRBrs: { 1927 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 1928 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 1929 if (ShImm == 0 || 1930 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 1931 --Latency; 1932 break; 1933 } 1934 case ARM::t2LDRs: 1935 case ARM::t2LDRBs: 1936 case ARM::t2LDRHs: 1937 case ARM::t2LDRSHs: { 1938 // Thumb2 mode: lsl only. 1939 unsigned ShAmt = DefMI->getOperand(3).getImm(); 1940 if (ShAmt == 0 || ShAmt == 2) 1941 --Latency; 1942 break; 1943 } 1944 } 1945 } 1946 1947 return Latency; 1948} 1949 1950int 1951ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 1952 SDNode *DefNode, unsigned DefIdx, 1953 SDNode *UseNode, unsigned UseIdx) const { 1954 if (!DefNode->isMachineOpcode()) 1955 return 1; 1956 1957 const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode()); 1958 if (!ItinData || ItinData->isEmpty()) 1959 return DefTID.mayLoad() ? 3 : 1; 1960 1961 if (!UseNode->isMachineOpcode()) { 1962 int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx); 1963 if (Subtarget.isCortexA9()) 1964 return Latency <= 2 ? 1 : Latency - 1; 1965 else 1966 return Latency <= 3 ? 1 : Latency - 2; 1967 } 1968 1969 const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode()); 1970 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 1971 unsigned DefAlign = !DefMN->memoperands_empty() 1972 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 1973 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 1974 unsigned UseAlign = !UseMN->memoperands_empty() 1975 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 1976 int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign, 1977 UseTID, UseIdx, UseAlign); 1978 1979 if (Latency > 1 && 1980 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 1981 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 1982 // variants are one cycle cheaper. 1983 switch (DefTID.getOpcode()) { 1984 default: break; 1985 case ARM::LDRrs: 1986 case ARM::LDRBrs: { 1987 unsigned ShOpVal = 1988 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 1989 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 1990 if (ShImm == 0 || 1991 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 1992 --Latency; 1993 break; 1994 } 1995 case ARM::t2LDRs: 1996 case ARM::t2LDRBs: 1997 case ARM::t2LDRHs: 1998 case ARM::t2LDRSHs: { 1999 // Thumb2 mode: lsl only. 2000 unsigned ShAmt = 2001 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2002 if (ShAmt == 0 || ShAmt == 2) 2003 --Latency; 2004 break; 2005 } 2006 } 2007 } 2008 2009 return Latency; 2010} 2011 2012int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2013 const MachineInstr *MI, 2014 unsigned *PredCost) const { 2015 if (MI->isCopyLike() || MI->isInsertSubreg() || 2016 MI->isRegSequence() || MI->isImplicitDef()) 2017 return 1; 2018 2019 if (!ItinData || ItinData->isEmpty()) 2020 return 1; 2021 2022 const TargetInstrDesc &TID = MI->getDesc(); 2023 unsigned Class = TID.getSchedClass(); 2024 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 2025 if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR)) 2026 // When predicated, CPSR is an additional source operand for CPSR updating 2027 // instructions, this apparently increases their latencies. 2028 *PredCost = 1; 2029 if (UOps) 2030 return ItinData->getStageLatency(Class); 2031 return getNumMicroOps(ItinData, MI); 2032} 2033 2034int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2035 SDNode *Node) const { 2036 if (!Node->isMachineOpcode()) 2037 return 1; 2038 2039 if (!ItinData || ItinData->isEmpty()) 2040 return 1; 2041 2042 unsigned Opcode = Node->getMachineOpcode(); 2043 switch (Opcode) { 2044 default: 2045 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 2046 case ARM::VLDMQ: 2047 case ARM::VSTMQ: 2048 return 2; 2049 } 2050} 2051 2052bool ARMBaseInstrInfo:: 2053hasHighOperandLatency(const InstrItineraryData *ItinData, 2054 const MachineRegisterInfo *MRI, 2055 const MachineInstr *DefMI, unsigned DefIdx, 2056 const MachineInstr *UseMI, unsigned UseIdx) const { 2057 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2058 unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 2059 if (Subtarget.isCortexA8() && 2060 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 2061 // CortexA8 VFP instructions are not pipelined. 2062 return true; 2063 2064 // Hoist VFP / NEON instructions with 4 or higher latency. 2065 int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); 2066 if (Latency <= 3) 2067 return false; 2068 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 2069 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 2070} 2071 2072bool ARMBaseInstrInfo:: 2073hasLowDefLatency(const InstrItineraryData *ItinData, 2074 const MachineInstr *DefMI, unsigned DefIdx) const { 2075 if (!ItinData || ItinData->isEmpty()) 2076 return false; 2077 2078 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2079 if (DDomain == ARMII::DomainGeneral) { 2080 unsigned DefClass = DefMI->getDesc().getSchedClass(); 2081 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2082 return (DefCycle != -1 && DefCycle <= 2); 2083 } 2084 return false; 2085} 2086