ARMBaseInstrInfo.cpp revision a4e3c7fc4ba2d55695b0484480685698132eba20
1//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the Base ARM implementation of the TargetInstrInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ARMBaseInstrInfo.h" 15#include "ARM.h" 16#include "ARMConstantPoolValue.h" 17#include "ARMHazardRecognizer.h" 18#include "ARMMachineFunctionInfo.h" 19#include "ARMRegisterInfo.h" 20#include "MCTargetDesc/ARMAddressingModes.h" 21#include "llvm/Constants.h" 22#include "llvm/Function.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/CodeGen/LiveVariables.h" 25#include "llvm/CodeGen/MachineConstantPool.h" 26#include "llvm/CodeGen/MachineFrameInfo.h" 27#include "llvm/CodeGen/MachineInstrBuilder.h" 28#include "llvm/CodeGen/MachineJumpTableInfo.h" 29#include "llvm/CodeGen/MachineMemOperand.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/CodeGen/SelectionDAGNodes.h" 32#include "llvm/MC/MCAsmInfo.h" 33#include "llvm/Support/BranchProbability.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/ErrorHandling.h" 37#include "llvm/ADT/STLExtras.h" 38 39#define GET_INSTRINFO_CTOR 40#include "ARMGenInstrInfo.inc" 41 42using namespace llvm; 43 44static cl::opt<bool> 45EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, 46 cl::desc("Enable ARM 2-addr to 3-addr conv")); 47 48static cl::opt<bool> 49WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), 50 cl::desc("Widen ARM vmovs to vmovd when possible")); 51 52/// ARM_MLxEntry - Record information about MLA / MLS instructions. 53struct ARM_MLxEntry { 54 unsigned MLxOpc; // MLA / MLS opcode 55 unsigned MulOpc; // Expanded multiplication opcode 56 unsigned AddSubOpc; // Expanded add / sub opcode 57 bool NegAcc; // True if the acc is negated before the add / sub. 58 bool HasLane; // True if instruction has an extra "lane" operand. 59}; 60 61static const ARM_MLxEntry ARM_MLxTable[] = { 62 // MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane 63 // fp scalar ops 64 { ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false }, 65 { ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false }, 66 { ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false }, 67 { ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false }, 68 { ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false }, 69 { ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false }, 70 { ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false }, 71 { ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false }, 72 73 // fp SIMD ops 74 { ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false }, 75 { ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false }, 76 { ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false }, 77 { ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false }, 78 { ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true }, 79 { ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true }, 80 { ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true }, 81 { ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true }, 82}; 83 84ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) 85 : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), 86 Subtarget(STI) { 87 for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) { 88 if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second) 89 assert(false && "Duplicated entries?"); 90 MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc); 91 MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc); 92 } 93} 94 95// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl 96// currently defaults to no prepass hazard recognizer. 97ScheduleHazardRecognizer *ARMBaseInstrInfo:: 98CreateTargetHazardRecognizer(const TargetMachine *TM, 99 const ScheduleDAG *DAG) const { 100 if (usePreRAHazardRecognizer()) { 101 const InstrItineraryData *II = TM->getInstrItineraryData(); 102 return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); 103 } 104 return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); 105} 106 107ScheduleHazardRecognizer *ARMBaseInstrInfo:: 108CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 109 const ScheduleDAG *DAG) const { 110 if (Subtarget.isThumb2() || Subtarget.hasVFP2()) 111 return (ScheduleHazardRecognizer *) 112 new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); 113 return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); 114} 115 116MachineInstr * 117ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 118 MachineBasicBlock::iterator &MBBI, 119 LiveVariables *LV) const { 120 // FIXME: Thumb2 support. 121 122 if (!EnableARM3Addr) 123 return NULL; 124 125 MachineInstr *MI = MBBI; 126 MachineFunction &MF = *MI->getParent()->getParent(); 127 uint64_t TSFlags = MI->getDesc().TSFlags; 128 bool isPre = false; 129 switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { 130 default: return NULL; 131 case ARMII::IndexModePre: 132 isPre = true; 133 break; 134 case ARMII::IndexModePost: 135 break; 136 } 137 138 // Try splitting an indexed load/store to an un-indexed one plus an add/sub 139 // operation. 140 unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); 141 if (MemOpc == 0) 142 return NULL; 143 144 MachineInstr *UpdateMI = NULL; 145 MachineInstr *MemMI = NULL; 146 unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); 147 const MCInstrDesc &MCID = MI->getDesc(); 148 unsigned NumOps = MCID.getNumOperands(); 149 bool isLoad = !MI->mayStore(); 150 const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); 151 const MachineOperand &Base = MI->getOperand(2); 152 const MachineOperand &Offset = MI->getOperand(NumOps-3); 153 unsigned WBReg = WB.getReg(); 154 unsigned BaseReg = Base.getReg(); 155 unsigned OffReg = Offset.getReg(); 156 unsigned OffImm = MI->getOperand(NumOps-2).getImm(); 157 ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); 158 switch (AddrMode) { 159 default: 160 assert(false && "Unknown indexed op!"); 161 return NULL; 162 case ARMII::AddrMode2: { 163 bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; 164 unsigned Amt = ARM_AM::getAM2Offset(OffImm); 165 if (OffReg == 0) { 166 if (ARM_AM::getSOImmVal(Amt) == -1) 167 // Can't encode it in a so_imm operand. This transformation will 168 // add more than 1 instruction. Abandon! 169 return NULL; 170 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 171 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 172 .addReg(BaseReg).addImm(Amt) 173 .addImm(Pred).addReg(0).addReg(0); 174 } else if (Amt != 0) { 175 ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); 176 unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); 177 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 178 get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg) 179 .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) 180 .addImm(Pred).addReg(0).addReg(0); 181 } else 182 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 183 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 184 .addReg(BaseReg).addReg(OffReg) 185 .addImm(Pred).addReg(0).addReg(0); 186 break; 187 } 188 case ARMII::AddrMode3 : { 189 bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; 190 unsigned Amt = ARM_AM::getAM3Offset(OffImm); 191 if (OffReg == 0) 192 // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. 193 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 194 get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) 195 .addReg(BaseReg).addImm(Amt) 196 .addImm(Pred).addReg(0).addReg(0); 197 else 198 UpdateMI = BuildMI(MF, MI->getDebugLoc(), 199 get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) 200 .addReg(BaseReg).addReg(OffReg) 201 .addImm(Pred).addReg(0).addReg(0); 202 break; 203 } 204 } 205 206 std::vector<MachineInstr*> NewMIs; 207 if (isPre) { 208 if (isLoad) 209 MemMI = BuildMI(MF, MI->getDebugLoc(), 210 get(MemOpc), MI->getOperand(0).getReg()) 211 .addReg(WBReg).addImm(0).addImm(Pred); 212 else 213 MemMI = BuildMI(MF, MI->getDebugLoc(), 214 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 215 .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); 216 NewMIs.push_back(MemMI); 217 NewMIs.push_back(UpdateMI); 218 } else { 219 if (isLoad) 220 MemMI = BuildMI(MF, MI->getDebugLoc(), 221 get(MemOpc), MI->getOperand(0).getReg()) 222 .addReg(BaseReg).addImm(0).addImm(Pred); 223 else 224 MemMI = BuildMI(MF, MI->getDebugLoc(), 225 get(MemOpc)).addReg(MI->getOperand(1).getReg()) 226 .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); 227 if (WB.isDead()) 228 UpdateMI->getOperand(0).setIsDead(); 229 NewMIs.push_back(UpdateMI); 230 NewMIs.push_back(MemMI); 231 } 232 233 // Transfer LiveVariables states, kill / dead info. 234 if (LV) { 235 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 236 MachineOperand &MO = MI->getOperand(i); 237 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 238 unsigned Reg = MO.getReg(); 239 240 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 241 if (MO.isDef()) { 242 MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; 243 if (MO.isDead()) 244 LV->addVirtualRegisterDead(Reg, NewMI); 245 } 246 if (MO.isUse() && MO.isKill()) { 247 for (unsigned j = 0; j < 2; ++j) { 248 // Look at the two new MI's in reverse order. 249 MachineInstr *NewMI = NewMIs[j]; 250 if (!NewMI->readsRegister(Reg)) 251 continue; 252 LV->addVirtualRegisterKilled(Reg, NewMI); 253 if (VI.removeKill(MI)) 254 VI.Kills.push_back(NewMI); 255 break; 256 } 257 } 258 } 259 } 260 } 261 262 MFI->insert(MBBI, NewMIs[1]); 263 MFI->insert(MBBI, NewMIs[0]); 264 return NewMIs[0]; 265} 266 267// Branch analysis. 268bool 269ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, 270 MachineBasicBlock *&FBB, 271 SmallVectorImpl<MachineOperand> &Cond, 272 bool AllowModify) const { 273 // If the block has no terminators, it just falls into the block after it. 274 MachineBasicBlock::iterator I = MBB.end(); 275 if (I == MBB.begin()) 276 return false; 277 --I; 278 while (I->isDebugValue()) { 279 if (I == MBB.begin()) 280 return false; 281 --I; 282 } 283 if (!isUnpredicatedTerminator(I)) 284 return false; 285 286 // Get the last instruction in the block. 287 MachineInstr *LastInst = I; 288 289 // If there is only one terminator instruction, process it. 290 unsigned LastOpc = LastInst->getOpcode(); 291 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 292 if (isUncondBranchOpcode(LastOpc)) { 293 TBB = LastInst->getOperand(0).getMBB(); 294 return false; 295 } 296 if (isCondBranchOpcode(LastOpc)) { 297 // Block ends with fall-through condbranch. 298 TBB = LastInst->getOperand(0).getMBB(); 299 Cond.push_back(LastInst->getOperand(1)); 300 Cond.push_back(LastInst->getOperand(2)); 301 return false; 302 } 303 return true; // Can't handle indirect branch. 304 } 305 306 // Get the instruction before it if it is a terminator. 307 MachineInstr *SecondLastInst = I; 308 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 309 310 // If AllowModify is true and the block ends with two or more unconditional 311 // branches, delete all but the first unconditional branch. 312 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 313 while (isUncondBranchOpcode(SecondLastOpc)) { 314 LastInst->eraseFromParent(); 315 LastInst = SecondLastInst; 316 LastOpc = LastInst->getOpcode(); 317 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 318 // Return now the only terminator is an unconditional branch. 319 TBB = LastInst->getOperand(0).getMBB(); 320 return false; 321 } else { 322 SecondLastInst = I; 323 SecondLastOpc = SecondLastInst->getOpcode(); 324 } 325 } 326 } 327 328 // If there are three terminators, we don't know what sort of block this is. 329 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 330 return true; 331 332 // If the block ends with a B and a Bcc, handle it. 333 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 334 TBB = SecondLastInst->getOperand(0).getMBB(); 335 Cond.push_back(SecondLastInst->getOperand(1)); 336 Cond.push_back(SecondLastInst->getOperand(2)); 337 FBB = LastInst->getOperand(0).getMBB(); 338 return false; 339 } 340 341 // If the block ends with two unconditional branches, handle it. The second 342 // one is not executed, so remove it. 343 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 344 TBB = SecondLastInst->getOperand(0).getMBB(); 345 I = LastInst; 346 if (AllowModify) 347 I->eraseFromParent(); 348 return false; 349 } 350 351 // ...likewise if it ends with a branch table followed by an unconditional 352 // branch. The branch folder can create these, and we must get rid of them for 353 // correctness of Thumb constant islands. 354 if ((isJumpTableBranchOpcode(SecondLastOpc) || 355 isIndirectBranchOpcode(SecondLastOpc)) && 356 isUncondBranchOpcode(LastOpc)) { 357 I = LastInst; 358 if (AllowModify) 359 I->eraseFromParent(); 360 return true; 361 } 362 363 // Otherwise, can't handle this. 364 return true; 365} 366 367 368unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 369 MachineBasicBlock::iterator I = MBB.end(); 370 if (I == MBB.begin()) return 0; 371 --I; 372 while (I->isDebugValue()) { 373 if (I == MBB.begin()) 374 return 0; 375 --I; 376 } 377 if (!isUncondBranchOpcode(I->getOpcode()) && 378 !isCondBranchOpcode(I->getOpcode())) 379 return 0; 380 381 // Remove the branch. 382 I->eraseFromParent(); 383 384 I = MBB.end(); 385 386 if (I == MBB.begin()) return 1; 387 --I; 388 if (!isCondBranchOpcode(I->getOpcode())) 389 return 1; 390 391 // Remove the branch. 392 I->eraseFromParent(); 393 return 2; 394} 395 396unsigned 397ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 398 MachineBasicBlock *FBB, 399 const SmallVectorImpl<MachineOperand> &Cond, 400 DebugLoc DL) const { 401 ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); 402 int BOpc = !AFI->isThumbFunction() 403 ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); 404 int BccOpc = !AFI->isThumbFunction() 405 ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); 406 bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function(); 407 408 // Shouldn't be a fall through. 409 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 410 assert((Cond.size() == 2 || Cond.size() == 0) && 411 "ARM branch conditions have two components!"); 412 413 if (FBB == 0) { 414 if (Cond.empty()) { // Unconditional branch? 415 if (isThumb) 416 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); 417 else 418 BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); 419 } else 420 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 421 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 422 return 1; 423 } 424 425 // Two-way conditional branch. 426 BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) 427 .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); 428 if (isThumb) 429 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0); 430 else 431 BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); 432 return 2; 433} 434 435bool ARMBaseInstrInfo:: 436ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 437 ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); 438 Cond[0].setImm(ARMCC::getOppositeCondition(CC)); 439 return false; 440} 441 442bool ARMBaseInstrInfo:: 443PredicateInstruction(MachineInstr *MI, 444 const SmallVectorImpl<MachineOperand> &Pred) const { 445 unsigned Opc = MI->getOpcode(); 446 if (isUncondBranchOpcode(Opc)) { 447 MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); 448 MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); 449 MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); 450 return true; 451 } 452 453 int PIdx = MI->findFirstPredOperandIdx(); 454 if (PIdx != -1) { 455 MachineOperand &PMO = MI->getOperand(PIdx); 456 PMO.setImm(Pred[0].getImm()); 457 MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); 458 return true; 459 } 460 return false; 461} 462 463bool ARMBaseInstrInfo:: 464SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 465 const SmallVectorImpl<MachineOperand> &Pred2) const { 466 if (Pred1.size() > 2 || Pred2.size() > 2) 467 return false; 468 469 ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); 470 ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); 471 if (CC1 == CC2) 472 return true; 473 474 switch (CC1) { 475 default: 476 return false; 477 case ARMCC::AL: 478 return true; 479 case ARMCC::HS: 480 return CC2 == ARMCC::HI; 481 case ARMCC::LS: 482 return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; 483 case ARMCC::GE: 484 return CC2 == ARMCC::GT; 485 case ARMCC::LE: 486 return CC2 == ARMCC::LT; 487 } 488} 489 490bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, 491 std::vector<MachineOperand> &Pred) const { 492 // FIXME: This confuses implicit_def with optional CPSR def. 493 const MCInstrDesc &MCID = MI->getDesc(); 494 if (!MCID.getImplicitDefs() && !MI->hasOptionalDef()) 495 return false; 496 497 bool Found = false; 498 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 499 const MachineOperand &MO = MI->getOperand(i); 500 if (MO.isReg() && MO.getReg() == ARM::CPSR) { 501 Pred.push_back(MO); 502 Found = true; 503 } 504 } 505 506 return Found; 507} 508 509/// isPredicable - Return true if the specified instruction can be predicated. 510/// By default, this returns true for every instruction with a 511/// PredicateOperand. 512bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { 513 if (!MI->isPredicable()) 514 return false; 515 516 if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { 517 ARMFunctionInfo *AFI = 518 MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); 519 return AFI->isThumb2Function(); 520 } 521 return true; 522} 523 524/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. 525LLVM_ATTRIBUTE_NOINLINE 526static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 527 unsigned JTI); 528static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, 529 unsigned JTI) { 530 assert(JTI < JT.size()); 531 return JT[JTI].MBBs.size(); 532} 533 534/// GetInstSize - Return the size of the specified MachineInstr. 535/// 536unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 537 const MachineBasicBlock &MBB = *MI->getParent(); 538 const MachineFunction *MF = MBB.getParent(); 539 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 540 541 const MCInstrDesc &MCID = MI->getDesc(); 542 if (MCID.getSize()) 543 return MCID.getSize(); 544 545 // If this machine instr is an inline asm, measure it. 546 if (MI->getOpcode() == ARM::INLINEASM) 547 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 548 if (MI->isLabel()) 549 return 0; 550 unsigned Opc = MI->getOpcode(); 551 switch (Opc) { 552 case TargetOpcode::IMPLICIT_DEF: 553 case TargetOpcode::KILL: 554 case TargetOpcode::PROLOG_LABEL: 555 case TargetOpcode::EH_LABEL: 556 case TargetOpcode::DBG_VALUE: 557 return 0; 558 case ARM::MOVi16_ga_pcrel: 559 case ARM::MOVTi16_ga_pcrel: 560 case ARM::t2MOVi16_ga_pcrel: 561 case ARM::t2MOVTi16_ga_pcrel: 562 return 4; 563 case ARM::MOVi32imm: 564 case ARM::t2MOVi32imm: 565 return 8; 566 case ARM::CONSTPOOL_ENTRY: 567 // If this machine instr is a constant pool entry, its size is recorded as 568 // operand #2. 569 return MI->getOperand(2).getImm(); 570 case ARM::Int_eh_sjlj_longjmp: 571 return 16; 572 case ARM::tInt_eh_sjlj_longjmp: 573 return 10; 574 case ARM::Int_eh_sjlj_setjmp: 575 case ARM::Int_eh_sjlj_setjmp_nofp: 576 return 20; 577 case ARM::tInt_eh_sjlj_setjmp: 578 case ARM::t2Int_eh_sjlj_setjmp: 579 case ARM::t2Int_eh_sjlj_setjmp_nofp: 580 return 12; 581 case ARM::BR_JTr: 582 case ARM::BR_JTm: 583 case ARM::BR_JTadd: 584 case ARM::tBR_JTr: 585 case ARM::t2BR_JT: 586 case ARM::t2TBB_JT: 587 case ARM::t2TBH_JT: { 588 // These are jumptable branches, i.e. a branch followed by an inlined 589 // jumptable. The size is 4 + 4 * number of entries. For TBB, each 590 // entry is one byte; TBH two byte each. 591 unsigned EntrySize = (Opc == ARM::t2TBB_JT) 592 ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4); 593 unsigned NumOps = MCID.getNumOperands(); 594 MachineOperand JTOP = 595 MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); 596 unsigned JTI = JTOP.getIndex(); 597 const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); 598 assert(MJTI != 0); 599 const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); 600 assert(JTI < JT.size()); 601 // Thumb instructions are 2 byte aligned, but JT entries are 4 byte 602 // 4 aligned. The assembler / linker may add 2 byte padding just before 603 // the JT entries. The size does not include this padding; the 604 // constant islands pass does separate bookkeeping for it. 605 // FIXME: If we know the size of the function is less than (1 << 16) *2 606 // bytes, we can use 16-bit entries instead. Then there won't be an 607 // alignment issue. 608 unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; 609 unsigned NumEntries = getNumJTEntries(JT, JTI); 610 if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) 611 // Make sure the instruction that follows TBB is 2-byte aligned. 612 // FIXME: Constant island pass should insert an "ALIGN" instruction 613 // instead. 614 ++NumEntries; 615 return NumEntries * EntrySize + InstSize; 616 } 617 default: 618 // Otherwise, pseudo-instruction sizes are zero. 619 return 0; 620 } 621 return 0; // Not reached 622} 623 624void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 625 MachineBasicBlock::iterator I, DebugLoc DL, 626 unsigned DestReg, unsigned SrcReg, 627 bool KillSrc) const { 628 bool GPRDest = ARM::GPRRegClass.contains(DestReg); 629 bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); 630 631 if (GPRDest && GPRSrc) { 632 AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) 633 .addReg(SrcReg, getKillRegState(KillSrc)))); 634 return; 635 } 636 637 bool SPRDest = ARM::SPRRegClass.contains(DestReg); 638 bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); 639 640 unsigned Opc = 0; 641 if (SPRDest && SPRSrc) 642 Opc = ARM::VMOVS; 643 else if (GPRDest && SPRSrc) 644 Opc = ARM::VMOVRS; 645 else if (SPRDest && GPRSrc) 646 Opc = ARM::VMOVSR; 647 else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) 648 Opc = ARM::VMOVD; 649 else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) 650 Opc = ARM::VORRq; 651 652 if (Opc) { 653 MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); 654 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 655 if (Opc == ARM::VORRq) 656 MIB.addReg(SrcReg, getKillRegState(KillSrc)); 657 AddDefaultPred(MIB); 658 return; 659 } 660 661 // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. 662 if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || 663 ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { 664 const TargetRegisterInfo *TRI = &getRegisterInfo(); 665 assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); 666 unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? 667 ARM::qsub_1 : ARM::qsub_3; 668 for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { 669 unsigned Dst = TRI->getSubReg(DestReg, i); 670 unsigned Src = TRI->getSubReg(SrcReg, i); 671 MachineInstrBuilder Mov = 672 AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) 673 .addReg(Dst, RegState::Define) 674 .addReg(Src, getKillRegState(KillSrc)) 675 .addReg(Src, getKillRegState(KillSrc))); 676 if (i == EndSubReg) { 677 Mov->addRegisterDefined(DestReg, TRI); 678 if (KillSrc) 679 Mov->addRegisterKilled(SrcReg, TRI); 680 } 681 } 682 return; 683 } 684 llvm_unreachable("Impossible reg-to-reg copy"); 685} 686 687static const 688MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, 689 unsigned Reg, unsigned SubIdx, unsigned State, 690 const TargetRegisterInfo *TRI) { 691 if (!SubIdx) 692 return MIB.addReg(Reg, State); 693 694 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 695 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 696 return MIB.addReg(Reg, State, SubIdx); 697} 698 699void ARMBaseInstrInfo:: 700storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 701 unsigned SrcReg, bool isKill, int FI, 702 const TargetRegisterClass *RC, 703 const TargetRegisterInfo *TRI) const { 704 DebugLoc DL; 705 if (I != MBB.end()) DL = I->getDebugLoc(); 706 MachineFunction &MF = *MBB.getParent(); 707 MachineFrameInfo &MFI = *MF.getFrameInfo(); 708 unsigned Align = MFI.getObjectAlignment(FI); 709 710 MachineMemOperand *MMO = 711 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 712 MachineMemOperand::MOStore, 713 MFI.getObjectSize(FI), 714 Align); 715 716 switch (RC->getSize()) { 717 case 4: 718 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 719 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12)) 720 .addReg(SrcReg, getKillRegState(isKill)) 721 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 722 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 723 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) 724 .addReg(SrcReg, getKillRegState(isKill)) 725 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 726 } else 727 llvm_unreachable("Unknown reg class!"); 728 break; 729 case 8: 730 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 731 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) 732 .addReg(SrcReg, getKillRegState(isKill)) 733 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 734 } else 735 llvm_unreachable("Unknown reg class!"); 736 break; 737 case 16: 738 if (ARM::QPRRegClass.hasSubClassEq(RC)) { 739 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 740 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo)) 741 .addFrameIndex(FI).addImm(16) 742 .addReg(SrcReg, getKillRegState(isKill)) 743 .addMemOperand(MMO)); 744 } else { 745 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA)) 746 .addReg(SrcReg, getKillRegState(isKill)) 747 .addFrameIndex(FI) 748 .addMemOperand(MMO)); 749 } 750 } else 751 llvm_unreachable("Unknown reg class!"); 752 break; 753 case 32: 754 if (ARM::QQPRRegClass.hasSubClassEq(RC)) { 755 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 756 // FIXME: It's possible to only store part of the QQ register if the 757 // spilled def has a sub-register index. 758 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo)) 759 .addFrameIndex(FI).addImm(16) 760 .addReg(SrcReg, getKillRegState(isKill)) 761 .addMemOperand(MMO)); 762 } else { 763 MachineInstrBuilder MIB = 764 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 765 .addFrameIndex(FI)) 766 .addMemOperand(MMO); 767 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 768 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 769 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 770 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 771 } 772 } else 773 llvm_unreachable("Unknown reg class!"); 774 break; 775 case 64: 776 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 777 MachineInstrBuilder MIB = 778 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) 779 .addFrameIndex(FI)) 780 .addMemOperand(MMO); 781 MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); 782 MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); 783 MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); 784 MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); 785 MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI); 786 MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); 787 MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); 788 AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); 789 } else 790 llvm_unreachable("Unknown reg class!"); 791 break; 792 default: 793 llvm_unreachable("Unknown reg class!"); 794 } 795} 796 797unsigned 798ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, 799 int &FrameIndex) const { 800 switch (MI->getOpcode()) { 801 default: break; 802 case ARM::STRrs: 803 case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. 804 if (MI->getOperand(1).isFI() && 805 MI->getOperand(2).isReg() && 806 MI->getOperand(3).isImm() && 807 MI->getOperand(2).getReg() == 0 && 808 MI->getOperand(3).getImm() == 0) { 809 FrameIndex = MI->getOperand(1).getIndex(); 810 return MI->getOperand(0).getReg(); 811 } 812 break; 813 case ARM::STRi12: 814 case ARM::t2STRi12: 815 case ARM::tSTRspi: 816 case ARM::VSTRD: 817 case ARM::VSTRS: 818 if (MI->getOperand(1).isFI() && 819 MI->getOperand(2).isImm() && 820 MI->getOperand(2).getImm() == 0) { 821 FrameIndex = MI->getOperand(1).getIndex(); 822 return MI->getOperand(0).getReg(); 823 } 824 break; 825 case ARM::VST1q64Pseudo: 826 if (MI->getOperand(0).isFI() && 827 MI->getOperand(2).getSubReg() == 0) { 828 FrameIndex = MI->getOperand(0).getIndex(); 829 return MI->getOperand(2).getReg(); 830 } 831 break; 832 case ARM::VSTMQIA: 833 if (MI->getOperand(1).isFI() && 834 MI->getOperand(0).getSubReg() == 0) { 835 FrameIndex = MI->getOperand(1).getIndex(); 836 return MI->getOperand(0).getReg(); 837 } 838 break; 839 } 840 841 return 0; 842} 843 844unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 845 int &FrameIndex) const { 846 const MachineMemOperand *Dummy; 847 return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex); 848} 849 850void ARMBaseInstrInfo:: 851loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, 852 unsigned DestReg, int FI, 853 const TargetRegisterClass *RC, 854 const TargetRegisterInfo *TRI) const { 855 DebugLoc DL; 856 if (I != MBB.end()) DL = I->getDebugLoc(); 857 MachineFunction &MF = *MBB.getParent(); 858 MachineFrameInfo &MFI = *MF.getFrameInfo(); 859 unsigned Align = MFI.getObjectAlignment(FI); 860 MachineMemOperand *MMO = 861 MF.getMachineMemOperand( 862 MachinePointerInfo::getFixedStack(FI), 863 MachineMemOperand::MOLoad, 864 MFI.getObjectSize(FI), 865 Align); 866 867 switch (RC->getSize()) { 868 case 4: 869 if (ARM::GPRRegClass.hasSubClassEq(RC)) { 870 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg) 871 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 872 873 } else if (ARM::SPRRegClass.hasSubClassEq(RC)) { 874 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) 875 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 876 } else 877 llvm_unreachable("Unknown reg class!"); 878 break; 879 case 8: 880 if (ARM::DPRRegClass.hasSubClassEq(RC)) { 881 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) 882 .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); 883 } else 884 llvm_unreachable("Unknown reg class!"); 885 break; 886 case 16: 887 if (ARM::QPRRegClass.hasSubClassEq(RC)) { 888 if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) { 889 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg) 890 .addFrameIndex(FI).addImm(16) 891 .addMemOperand(MMO)); 892 } else { 893 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg) 894 .addFrameIndex(FI) 895 .addMemOperand(MMO)); 896 } 897 } else 898 llvm_unreachable("Unknown reg class!"); 899 break; 900 case 32: 901 if (ARM::QQPRRegClass.hasSubClassEq(RC)) { 902 if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { 903 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) 904 .addFrameIndex(FI).addImm(16) 905 .addMemOperand(MMO)); 906 } else { 907 MachineInstrBuilder MIB = 908 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 909 .addFrameIndex(FI)) 910 .addMemOperand(MMO); 911 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 912 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 913 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 914 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 915 MIB.addReg(DestReg, RegState::Define | RegState::Implicit); 916 } 917 } else 918 llvm_unreachable("Unknown reg class!"); 919 break; 920 case 64: 921 if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) { 922 MachineInstrBuilder MIB = 923 AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) 924 .addFrameIndex(FI)) 925 .addMemOperand(MMO); 926 MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); 927 MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); 928 MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); 929 MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); 930 MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); 931 MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); 932 MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); 933 MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); 934 MIB.addReg(DestReg, RegState::Define | RegState::Implicit); 935 } else 936 llvm_unreachable("Unknown reg class!"); 937 break; 938 default: 939 llvm_unreachable("Unknown regclass!"); 940 } 941} 942 943unsigned 944ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 945 int &FrameIndex) const { 946 switch (MI->getOpcode()) { 947 default: break; 948 case ARM::LDRrs: 949 case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. 950 if (MI->getOperand(1).isFI() && 951 MI->getOperand(2).isReg() && 952 MI->getOperand(3).isImm() && 953 MI->getOperand(2).getReg() == 0 && 954 MI->getOperand(3).getImm() == 0) { 955 FrameIndex = MI->getOperand(1).getIndex(); 956 return MI->getOperand(0).getReg(); 957 } 958 break; 959 case ARM::LDRi12: 960 case ARM::t2LDRi12: 961 case ARM::tLDRspi: 962 case ARM::VLDRD: 963 case ARM::VLDRS: 964 if (MI->getOperand(1).isFI() && 965 MI->getOperand(2).isImm() && 966 MI->getOperand(2).getImm() == 0) { 967 FrameIndex = MI->getOperand(1).getIndex(); 968 return MI->getOperand(0).getReg(); 969 } 970 break; 971 case ARM::VLD1q64Pseudo: 972 if (MI->getOperand(1).isFI() && 973 MI->getOperand(0).getSubReg() == 0) { 974 FrameIndex = MI->getOperand(1).getIndex(); 975 return MI->getOperand(0).getReg(); 976 } 977 break; 978 case ARM::VLDMQIA: 979 if (MI->getOperand(1).isFI() && 980 MI->getOperand(0).getSubReg() == 0) { 981 FrameIndex = MI->getOperand(1).getIndex(); 982 return MI->getOperand(0).getReg(); 983 } 984 break; 985 } 986 987 return 0; 988} 989 990unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 991 int &FrameIndex) const { 992 const MachineMemOperand *Dummy; 993 return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); 994} 995 996bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ 997 // This hook gets to expand COPY instructions before they become 998 // copyPhysReg() calls. Look for VMOVS instructions that can legally be 999 // widened to VMOVD. We prefer the VMOVD when possible because it may be 1000 // changed into a VORR that can go down the NEON pipeline. 1001 if (!WidenVMOVS || !MI->isCopy()) 1002 return false; 1003 1004 // Look for a copy between even S-registers. That is where we keep floats 1005 // when using NEON v2f32 instructions for f32 arithmetic. 1006 unsigned DstRegS = MI->getOperand(0).getReg(); 1007 unsigned SrcRegS = MI->getOperand(1).getReg(); 1008 if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS)) 1009 return false; 1010 1011 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1012 unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0, 1013 &ARM::DPRRegClass); 1014 unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0, 1015 &ARM::DPRRegClass); 1016 if (!DstRegD || !SrcRegD) 1017 return false; 1018 1019 // We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only 1020 // legal if the COPY already defines the full DstRegD, and it isn't a 1021 // sub-register insertion. 1022 if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI)) 1023 return false; 1024 1025 // A dead copy shouldn't show up here, but reject it just in case. 1026 if (MI->getOperand(0).isDead()) 1027 return false; 1028 1029 // All clear, widen the COPY. 1030 DEBUG(dbgs() << "widening: " << *MI); 1031 1032 // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg 1033 // or some other super-register. 1034 int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD); 1035 if (ImpDefIdx != -1) 1036 MI->RemoveOperand(ImpDefIdx); 1037 1038 // Change the opcode and operands. 1039 MI->setDesc(get(ARM::VMOVD)); 1040 MI->getOperand(0).setReg(DstRegD); 1041 MI->getOperand(1).setReg(SrcRegD); 1042 AddDefaultPred(MachineInstrBuilder(MI)); 1043 1044 // We are now reading SrcRegD instead of SrcRegS. This may upset the 1045 // register scavenger and machine verifier, so we need to indicate that we 1046 // are reading an undefined value from SrcRegD, but a proper value from 1047 // SrcRegS. 1048 MI->getOperand(1).setIsUndef(); 1049 MachineInstrBuilder(MI).addReg(SrcRegS, RegState::Implicit); 1050 1051 // SrcRegD may actually contain an unrelated value in the ssub_1 1052 // sub-register. Don't kill it. Only kill the ssub_0 sub-register. 1053 if (MI->getOperand(1).isKill()) { 1054 MI->getOperand(1).setIsKill(false); 1055 MI->addRegisterKilled(SrcRegS, TRI, true); 1056 } 1057 1058 DEBUG(dbgs() << "replaced by: " << *MI); 1059 return true; 1060} 1061 1062MachineInstr* 1063ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 1064 int FrameIx, uint64_t Offset, 1065 const MDNode *MDPtr, 1066 DebugLoc DL) const { 1067 MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) 1068 .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); 1069 return &*MIB; 1070} 1071 1072/// Create a copy of a const pool value. Update CPI to the new index and return 1073/// the label UID. 1074static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 1075 MachineConstantPool *MCP = MF.getConstantPool(); 1076 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1077 1078 const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; 1079 assert(MCPE.isMachineConstantPoolEntry() && 1080 "Expecting a machine constantpool entry!"); 1081 ARMConstantPoolValue *ACPV = 1082 static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); 1083 1084 unsigned PCLabelId = AFI->createPICLabelUId(); 1085 ARMConstantPoolValue *NewCPV = 0; 1086 // FIXME: The below assumes PIC relocation model and that the function 1087 // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and 1088 // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR 1089 // instructions, so that's probably OK, but is PIC always correct when 1090 // we get here? 1091 if (ACPV->isGlobalValue()) 1092 NewCPV = ARMConstantPoolConstant:: 1093 Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, 1094 ARMCP::CPValue, 4); 1095 else if (ACPV->isExtSymbol()) 1096 NewCPV = ARMConstantPoolSymbol:: 1097 Create(MF.getFunction()->getContext(), 1098 cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); 1099 else if (ACPV->isBlockAddress()) 1100 NewCPV = ARMConstantPoolConstant:: 1101 Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, 1102 ARMCP::CPBlockAddress, 4); 1103 else if (ACPV->isLSDA()) 1104 NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, 1105 ARMCP::CPLSDA, 4); 1106 else if (ACPV->isMachineBasicBlock()) 1107 NewCPV = ARMConstantPoolMBB:: 1108 Create(MF.getFunction()->getContext(), 1109 cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); 1110 else 1111 llvm_unreachable("Unexpected ARM constantpool value type!!"); 1112 CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); 1113 return PCLabelId; 1114} 1115 1116void ARMBaseInstrInfo:: 1117reMaterialize(MachineBasicBlock &MBB, 1118 MachineBasicBlock::iterator I, 1119 unsigned DestReg, unsigned SubIdx, 1120 const MachineInstr *Orig, 1121 const TargetRegisterInfo &TRI) const { 1122 unsigned Opcode = Orig->getOpcode(); 1123 switch (Opcode) { 1124 default: { 1125 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1126 MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); 1127 MBB.insert(I, MI); 1128 break; 1129 } 1130 case ARM::tLDRpci_pic: 1131 case ARM::t2LDRpci_pic: { 1132 MachineFunction &MF = *MBB.getParent(); 1133 unsigned CPI = Orig->getOperand(1).getIndex(); 1134 unsigned PCLabelId = duplicateCPV(MF, CPI); 1135 MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), 1136 DestReg) 1137 .addConstantPoolIndex(CPI).addImm(PCLabelId); 1138 MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); 1139 break; 1140 } 1141 } 1142} 1143 1144MachineInstr * 1145ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const { 1146 MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF); 1147 switch(Orig->getOpcode()) { 1148 case ARM::tLDRpci_pic: 1149 case ARM::t2LDRpci_pic: { 1150 unsigned CPI = Orig->getOperand(1).getIndex(); 1151 unsigned PCLabelId = duplicateCPV(MF, CPI); 1152 Orig->getOperand(1).setIndex(CPI); 1153 Orig->getOperand(2).setImm(PCLabelId); 1154 break; 1155 } 1156 } 1157 return MI; 1158} 1159 1160bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, 1161 const MachineInstr *MI1, 1162 const MachineRegisterInfo *MRI) const { 1163 int Opcode = MI0->getOpcode(); 1164 if (Opcode == ARM::t2LDRpci || 1165 Opcode == ARM::t2LDRpci_pic || 1166 Opcode == ARM::tLDRpci || 1167 Opcode == ARM::tLDRpci_pic || 1168 Opcode == ARM::MOV_ga_dyn || 1169 Opcode == ARM::MOV_ga_pcrel || 1170 Opcode == ARM::MOV_ga_pcrel_ldr || 1171 Opcode == ARM::t2MOV_ga_dyn || 1172 Opcode == ARM::t2MOV_ga_pcrel) { 1173 if (MI1->getOpcode() != Opcode) 1174 return false; 1175 if (MI0->getNumOperands() != MI1->getNumOperands()) 1176 return false; 1177 1178 const MachineOperand &MO0 = MI0->getOperand(1); 1179 const MachineOperand &MO1 = MI1->getOperand(1); 1180 if (MO0.getOffset() != MO1.getOffset()) 1181 return false; 1182 1183 if (Opcode == ARM::MOV_ga_dyn || 1184 Opcode == ARM::MOV_ga_pcrel || 1185 Opcode == ARM::MOV_ga_pcrel_ldr || 1186 Opcode == ARM::t2MOV_ga_dyn || 1187 Opcode == ARM::t2MOV_ga_pcrel) 1188 // Ignore the PC labels. 1189 return MO0.getGlobal() == MO1.getGlobal(); 1190 1191 const MachineFunction *MF = MI0->getParent()->getParent(); 1192 const MachineConstantPool *MCP = MF->getConstantPool(); 1193 int CPI0 = MO0.getIndex(); 1194 int CPI1 = MO1.getIndex(); 1195 const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; 1196 const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; 1197 bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); 1198 bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); 1199 if (isARMCP0 && isARMCP1) { 1200 ARMConstantPoolValue *ACPV0 = 1201 static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); 1202 ARMConstantPoolValue *ACPV1 = 1203 static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); 1204 return ACPV0->hasSameValue(ACPV1); 1205 } else if (!isARMCP0 && !isARMCP1) { 1206 return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; 1207 } 1208 return false; 1209 } else if (Opcode == ARM::PICLDR) { 1210 if (MI1->getOpcode() != Opcode) 1211 return false; 1212 if (MI0->getNumOperands() != MI1->getNumOperands()) 1213 return false; 1214 1215 unsigned Addr0 = MI0->getOperand(1).getReg(); 1216 unsigned Addr1 = MI1->getOperand(1).getReg(); 1217 if (Addr0 != Addr1) { 1218 if (!MRI || 1219 !TargetRegisterInfo::isVirtualRegister(Addr0) || 1220 !TargetRegisterInfo::isVirtualRegister(Addr1)) 1221 return false; 1222 1223 // This assumes SSA form. 1224 MachineInstr *Def0 = MRI->getVRegDef(Addr0); 1225 MachineInstr *Def1 = MRI->getVRegDef(Addr1); 1226 // Check if the loaded value, e.g. a constantpool of a global address, are 1227 // the same. 1228 if (!produceSameValue(Def0, Def1, MRI)) 1229 return false; 1230 } 1231 1232 for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) { 1233 // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg 1234 const MachineOperand &MO0 = MI0->getOperand(i); 1235 const MachineOperand &MO1 = MI1->getOperand(i); 1236 if (!MO0.isIdenticalTo(MO1)) 1237 return false; 1238 } 1239 return true; 1240 } 1241 1242 return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); 1243} 1244 1245/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to 1246/// determine if two loads are loading from the same base address. It should 1247/// only return true if the base pointers are the same and the only differences 1248/// between the two addresses is the offset. It also returns the offsets by 1249/// reference. 1250bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, 1251 int64_t &Offset1, 1252 int64_t &Offset2) const { 1253 // Don't worry about Thumb: just ARM and Thumb2. 1254 if (Subtarget.isThumb1Only()) return false; 1255 1256 if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) 1257 return false; 1258 1259 switch (Load1->getMachineOpcode()) { 1260 default: 1261 return false; 1262 case ARM::LDRi12: 1263 case ARM::LDRBi12: 1264 case ARM::LDRD: 1265 case ARM::LDRH: 1266 case ARM::LDRSB: 1267 case ARM::LDRSH: 1268 case ARM::VLDRD: 1269 case ARM::VLDRS: 1270 case ARM::t2LDRi8: 1271 case ARM::t2LDRDi8: 1272 case ARM::t2LDRSHi8: 1273 case ARM::t2LDRi12: 1274 case ARM::t2LDRSHi12: 1275 break; 1276 } 1277 1278 switch (Load2->getMachineOpcode()) { 1279 default: 1280 return false; 1281 case ARM::LDRi12: 1282 case ARM::LDRBi12: 1283 case ARM::LDRD: 1284 case ARM::LDRH: 1285 case ARM::LDRSB: 1286 case ARM::LDRSH: 1287 case ARM::VLDRD: 1288 case ARM::VLDRS: 1289 case ARM::t2LDRi8: 1290 case ARM::t2LDRDi8: 1291 case ARM::t2LDRSHi8: 1292 case ARM::t2LDRi12: 1293 case ARM::t2LDRSHi12: 1294 break; 1295 } 1296 1297 // Check if base addresses and chain operands match. 1298 if (Load1->getOperand(0) != Load2->getOperand(0) || 1299 Load1->getOperand(4) != Load2->getOperand(4)) 1300 return false; 1301 1302 // Index should be Reg0. 1303 if (Load1->getOperand(3) != Load2->getOperand(3)) 1304 return false; 1305 1306 // Determine the offsets. 1307 if (isa<ConstantSDNode>(Load1->getOperand(1)) && 1308 isa<ConstantSDNode>(Load2->getOperand(1))) { 1309 Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); 1310 Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); 1311 return true; 1312 } 1313 1314 return false; 1315} 1316 1317/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to 1318/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should 1319/// be scheduled togther. On some targets if two loads are loading from 1320/// addresses in the same cache line, it's better if they are scheduled 1321/// together. This function takes two integers that represent the load offsets 1322/// from the common base address. It returns true if it decides it's desirable 1323/// to schedule the two loads together. "NumLoads" is the number of loads that 1324/// have already been scheduled after Load1. 1325bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, 1326 int64_t Offset1, int64_t Offset2, 1327 unsigned NumLoads) const { 1328 // Don't worry about Thumb: just ARM and Thumb2. 1329 if (Subtarget.isThumb1Only()) return false; 1330 1331 assert(Offset2 > Offset1); 1332 1333 if ((Offset2 - Offset1) / 8 > 64) 1334 return false; 1335 1336 if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) 1337 return false; // FIXME: overly conservative? 1338 1339 // Four loads in a row should be sufficient. 1340 if (NumLoads >= 3) 1341 return false; 1342 1343 return true; 1344} 1345 1346bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, 1347 const MachineBasicBlock *MBB, 1348 const MachineFunction &MF) const { 1349 // Debug info is never a scheduling boundary. It's necessary to be explicit 1350 // due to the special treatment of IT instructions below, otherwise a 1351 // dbg_value followed by an IT will result in the IT instruction being 1352 // considered a scheduling hazard, which is wrong. It should be the actual 1353 // instruction preceding the dbg_value instruction(s), just like it is 1354 // when debug info is not present. 1355 if (MI->isDebugValue()) 1356 return false; 1357 1358 // Terminators and labels can't be scheduled around. 1359 if (MI->isTerminator() || MI->isLabel()) 1360 return true; 1361 1362 // Treat the start of the IT block as a scheduling boundary, but schedule 1363 // t2IT along with all instructions following it. 1364 // FIXME: This is a big hammer. But the alternative is to add all potential 1365 // true and anti dependencies to IT block instructions as implicit operands 1366 // to the t2IT instruction. The added compile time and complexity does not 1367 // seem worth it. 1368 MachineBasicBlock::const_iterator I = MI; 1369 // Make sure to skip any dbg_value instructions 1370 while (++I != MBB->end() && I->isDebugValue()) 1371 ; 1372 if (I != MBB->end() && I->getOpcode() == ARM::t2IT) 1373 return true; 1374 1375 // Don't attempt to schedule around any instruction that defines 1376 // a stack-oriented pointer, as it's unlikely to be profitable. This 1377 // saves compile time, because it doesn't require every single 1378 // stack slot reference to depend on the instruction that does the 1379 // modification. 1380 if (MI->definesRegister(ARM::SP)) 1381 return true; 1382 1383 return false; 1384} 1385 1386bool ARMBaseInstrInfo:: 1387isProfitableToIfCvt(MachineBasicBlock &MBB, 1388 unsigned NumCycles, unsigned ExtraPredCycles, 1389 const BranchProbability &Probability) const { 1390 if (!NumCycles) 1391 return false; 1392 1393 // Attempt to estimate the relative costs of predication versus branching. 1394 unsigned UnpredCost = Probability.getNumerator() * NumCycles; 1395 UnpredCost /= Probability.getDenominator(); 1396 UnpredCost += 1; // The branch itself 1397 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1398 1399 return (NumCycles + ExtraPredCycles) <= UnpredCost; 1400} 1401 1402bool ARMBaseInstrInfo:: 1403isProfitableToIfCvt(MachineBasicBlock &TMBB, 1404 unsigned TCycles, unsigned TExtra, 1405 MachineBasicBlock &FMBB, 1406 unsigned FCycles, unsigned FExtra, 1407 const BranchProbability &Probability) const { 1408 if (!TCycles || !FCycles) 1409 return false; 1410 1411 // Attempt to estimate the relative costs of predication versus branching. 1412 unsigned TUnpredCost = Probability.getNumerator() * TCycles; 1413 TUnpredCost /= Probability.getDenominator(); 1414 1415 uint32_t Comp = Probability.getDenominator() - Probability.getNumerator(); 1416 unsigned FUnpredCost = Comp * FCycles; 1417 FUnpredCost /= Probability.getDenominator(); 1418 1419 unsigned UnpredCost = TUnpredCost + FUnpredCost; 1420 UnpredCost += 1; // The branch itself 1421 UnpredCost += Subtarget.getMispredictionPenalty() / 10; 1422 1423 return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; 1424} 1425 1426/// getInstrPredicate - If instruction is predicated, returns its predicate 1427/// condition, otherwise returns AL. It also returns the condition code 1428/// register by reference. 1429ARMCC::CondCodes 1430llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { 1431 int PIdx = MI->findFirstPredOperandIdx(); 1432 if (PIdx == -1) { 1433 PredReg = 0; 1434 return ARMCC::AL; 1435 } 1436 1437 PredReg = MI->getOperand(PIdx+1).getReg(); 1438 return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); 1439} 1440 1441 1442int llvm::getMatchingCondBranchOpcode(int Opc) { 1443 if (Opc == ARM::B) 1444 return ARM::Bcc; 1445 else if (Opc == ARM::tB) 1446 return ARM::tBcc; 1447 else if (Opc == ARM::t2B) 1448 return ARM::t2Bcc; 1449 1450 llvm_unreachable("Unknown unconditional branch opcode!"); 1451 return 0; 1452} 1453 1454 1455/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the 1456/// instruction is encoded with an 'S' bit is determined by the optional CPSR 1457/// def operand. 1458/// 1459/// This will go away once we can teach tblgen how to set the optional CPSR def 1460/// operand itself. 1461struct AddSubFlagsOpcodePair { 1462 unsigned PseudoOpc; 1463 unsigned MachineOpc; 1464}; 1465 1466static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 1467 {ARM::ADDSri, ARM::ADDri}, 1468 {ARM::ADDSrr, ARM::ADDrr}, 1469 {ARM::ADDSrsi, ARM::ADDrsi}, 1470 {ARM::ADDSrsr, ARM::ADDrsr}, 1471 1472 {ARM::SUBSri, ARM::SUBri}, 1473 {ARM::SUBSrr, ARM::SUBrr}, 1474 {ARM::SUBSrsi, ARM::SUBrsi}, 1475 {ARM::SUBSrsr, ARM::SUBrsr}, 1476 1477 {ARM::RSBSri, ARM::RSBri}, 1478 {ARM::RSBSrsi, ARM::RSBrsi}, 1479 {ARM::RSBSrsr, ARM::RSBrsr}, 1480 1481 {ARM::t2ADDSri, ARM::t2ADDri}, 1482 {ARM::t2ADDSrr, ARM::t2ADDrr}, 1483 {ARM::t2ADDSrs, ARM::t2ADDrs}, 1484 1485 {ARM::t2SUBSri, ARM::t2SUBri}, 1486 {ARM::t2SUBSrr, ARM::t2SUBrr}, 1487 {ARM::t2SUBSrs, ARM::t2SUBrs}, 1488 1489 {ARM::t2RSBSri, ARM::t2RSBri}, 1490 {ARM::t2RSBSrs, ARM::t2RSBrs}, 1491}; 1492 1493unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) { 1494 static const int NPairs = 1495 sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); 1496 for (AddSubFlagsOpcodePair *OpcPair = &AddSubFlagsOpcodeMap[0], 1497 *End = &AddSubFlagsOpcodeMap[NPairs]; OpcPair != End; ++OpcPair) { 1498 if (OldOpc == OpcPair->PseudoOpc) { 1499 return OpcPair->MachineOpc; 1500 } 1501 } 1502 return 0; 1503} 1504 1505void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, 1506 MachineBasicBlock::iterator &MBBI, DebugLoc dl, 1507 unsigned DestReg, unsigned BaseReg, int NumBytes, 1508 ARMCC::CondCodes Pred, unsigned PredReg, 1509 const ARMBaseInstrInfo &TII, unsigned MIFlags) { 1510 bool isSub = NumBytes < 0; 1511 if (isSub) NumBytes = -NumBytes; 1512 1513 while (NumBytes) { 1514 unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); 1515 unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); 1516 assert(ThisVal && "Didn't extract field correctly"); 1517 1518 // We will handle these bits from offset, clear them. 1519 NumBytes &= ~ThisVal; 1520 1521 assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); 1522 1523 // Build the new ADD / SUB. 1524 unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; 1525 BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) 1526 .addReg(BaseReg, RegState::Kill).addImm(ThisVal) 1527 .addImm((unsigned)Pred).addReg(PredReg).addReg(0) 1528 .setMIFlags(MIFlags); 1529 BaseReg = DestReg; 1530 } 1531} 1532 1533bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 1534 unsigned FrameReg, int &Offset, 1535 const ARMBaseInstrInfo &TII) { 1536 unsigned Opcode = MI.getOpcode(); 1537 const MCInstrDesc &Desc = MI.getDesc(); 1538 unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); 1539 bool isSub = false; 1540 1541 // Memory operands in inline assembly always use AddrMode2. 1542 if (Opcode == ARM::INLINEASM) 1543 AddrMode = ARMII::AddrMode2; 1544 1545 if (Opcode == ARM::ADDri) { 1546 Offset += MI.getOperand(FrameRegIdx+1).getImm(); 1547 if (Offset == 0) { 1548 // Turn it into a move. 1549 MI.setDesc(TII.get(ARM::MOVr)); 1550 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1551 MI.RemoveOperand(FrameRegIdx+1); 1552 Offset = 0; 1553 return true; 1554 } else if (Offset < 0) { 1555 Offset = -Offset; 1556 isSub = true; 1557 MI.setDesc(TII.get(ARM::SUBri)); 1558 } 1559 1560 // Common case: small offset, fits into instruction. 1561 if (ARM_AM::getSOImmVal(Offset) != -1) { 1562 // Replace the FrameIndex with sp / fp 1563 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1564 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); 1565 Offset = 0; 1566 return true; 1567 } 1568 1569 // Otherwise, pull as much of the immedidate into this ADDri/SUBri 1570 // as possible. 1571 unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); 1572 unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); 1573 1574 // We will handle these bits from offset, clear them. 1575 Offset &= ~ThisImmVal; 1576 1577 // Get the properly encoded SOImmVal field. 1578 assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && 1579 "Bit extraction didn't work?"); 1580 MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); 1581 } else { 1582 unsigned ImmIdx = 0; 1583 int InstrOffs = 0; 1584 unsigned NumBits = 0; 1585 unsigned Scale = 1; 1586 switch (AddrMode) { 1587 case ARMII::AddrMode_i12: { 1588 ImmIdx = FrameRegIdx + 1; 1589 InstrOffs = MI.getOperand(ImmIdx).getImm(); 1590 NumBits = 12; 1591 break; 1592 } 1593 case ARMII::AddrMode2: { 1594 ImmIdx = FrameRegIdx+2; 1595 InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); 1596 if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1597 InstrOffs *= -1; 1598 NumBits = 12; 1599 break; 1600 } 1601 case ARMII::AddrMode3: { 1602 ImmIdx = FrameRegIdx+2; 1603 InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); 1604 if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1605 InstrOffs *= -1; 1606 NumBits = 8; 1607 break; 1608 } 1609 case ARMII::AddrMode4: 1610 case ARMII::AddrMode6: 1611 // Can't fold any offset even if it's zero. 1612 return false; 1613 case ARMII::AddrMode5: { 1614 ImmIdx = FrameRegIdx+1; 1615 InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); 1616 if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) 1617 InstrOffs *= -1; 1618 NumBits = 8; 1619 Scale = 4; 1620 break; 1621 } 1622 default: 1623 llvm_unreachable("Unsupported addressing mode!"); 1624 break; 1625 } 1626 1627 Offset += InstrOffs * Scale; 1628 assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); 1629 if (Offset < 0) { 1630 Offset = -Offset; 1631 isSub = true; 1632 } 1633 1634 // Attempt to fold address comp. if opcode has offset bits 1635 if (NumBits > 0) { 1636 // Common case: small offset, fits into instruction. 1637 MachineOperand &ImmOp = MI.getOperand(ImmIdx); 1638 int ImmedOffset = Offset / Scale; 1639 unsigned Mask = (1 << NumBits) - 1; 1640 if ((unsigned)Offset <= Mask * Scale) { 1641 // Replace the FrameIndex with sp 1642 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 1643 // FIXME: When addrmode2 goes away, this will simplify (like the 1644 // T2 version), as the LDR.i12 versions don't need the encoding 1645 // tricks for the offset value. 1646 if (isSub) { 1647 if (AddrMode == ARMII::AddrMode_i12) 1648 ImmedOffset = -ImmedOffset; 1649 else 1650 ImmedOffset |= 1 << NumBits; 1651 } 1652 ImmOp.ChangeToImmediate(ImmedOffset); 1653 Offset = 0; 1654 return true; 1655 } 1656 1657 // Otherwise, it didn't fit. Pull in what we can to simplify the immed. 1658 ImmedOffset = ImmedOffset & Mask; 1659 if (isSub) { 1660 if (AddrMode == ARMII::AddrMode_i12) 1661 ImmedOffset = -ImmedOffset; 1662 else 1663 ImmedOffset |= 1 << NumBits; 1664 } 1665 ImmOp.ChangeToImmediate(ImmedOffset); 1666 Offset &= ~(Mask*Scale); 1667 } 1668 } 1669 1670 Offset = (isSub) ? -Offset : Offset; 1671 return Offset == 0; 1672} 1673 1674bool ARMBaseInstrInfo:: 1675AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask, 1676 int &CmpValue) const { 1677 switch (MI->getOpcode()) { 1678 default: break; 1679 case ARM::CMPri: 1680 case ARM::t2CMPri: 1681 SrcReg = MI->getOperand(0).getReg(); 1682 CmpMask = ~0; 1683 CmpValue = MI->getOperand(1).getImm(); 1684 return true; 1685 case ARM::TSTri: 1686 case ARM::t2TSTri: 1687 SrcReg = MI->getOperand(0).getReg(); 1688 CmpMask = MI->getOperand(1).getImm(); 1689 CmpValue = 0; 1690 return true; 1691 } 1692 1693 return false; 1694} 1695 1696/// isSuitableForMask - Identify a suitable 'and' instruction that 1697/// operates on the given source register and applies the same mask 1698/// as a 'tst' instruction. Provide a limited look-through for copies. 1699/// When successful, MI will hold the found instruction. 1700static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg, 1701 int CmpMask, bool CommonUse) { 1702 switch (MI->getOpcode()) { 1703 case ARM::ANDri: 1704 case ARM::t2ANDri: 1705 if (CmpMask != MI->getOperand(2).getImm()) 1706 return false; 1707 if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg()) 1708 return true; 1709 break; 1710 case ARM::COPY: { 1711 // Walk down one instruction which is potentially an 'and'. 1712 const MachineInstr &Copy = *MI; 1713 MachineBasicBlock::iterator AND( 1714 llvm::next(MachineBasicBlock::iterator(MI))); 1715 if (AND == MI->getParent()->end()) return false; 1716 MI = AND; 1717 return isSuitableForMask(MI, Copy.getOperand(0).getReg(), 1718 CmpMask, true); 1719 } 1720 } 1721 1722 return false; 1723} 1724 1725/// OptimizeCompareInstr - Convert the instruction supplying the argument to the 1726/// comparison into one that sets the zero bit in the flags register. 1727bool ARMBaseInstrInfo:: 1728OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, 1729 int CmpValue, const MachineRegisterInfo *MRI) const { 1730 if (CmpValue != 0) 1731 return false; 1732 1733 MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg); 1734 if (llvm::next(DI) != MRI->def_end()) 1735 // Only support one definition. 1736 return false; 1737 1738 MachineInstr *MI = &*DI; 1739 1740 // Masked compares sometimes use the same register as the corresponding 'and'. 1741 if (CmpMask != ~0) { 1742 if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) { 1743 MI = 0; 1744 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg), 1745 UE = MRI->use_end(); UI != UE; ++UI) { 1746 if (UI->getParent() != CmpInstr->getParent()) continue; 1747 MachineInstr *PotentialAND = &*UI; 1748 if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true)) 1749 continue; 1750 MI = PotentialAND; 1751 break; 1752 } 1753 if (!MI) return false; 1754 } 1755 } 1756 1757 // Conservatively refuse to convert an instruction which isn't in the same BB 1758 // as the comparison. 1759 if (MI->getParent() != CmpInstr->getParent()) 1760 return false; 1761 1762 // Check that CPSR isn't set between the comparison instruction and the one we 1763 // want to change. 1764 MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin(); 1765 1766 // Early exit if CmpInstr is at the beginning of the BB. 1767 if (I == B) return false; 1768 1769 --I; 1770 for (; I != E; --I) { 1771 const MachineInstr &Instr = *I; 1772 1773 for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) { 1774 const MachineOperand &MO = Instr.getOperand(IO); 1775 if (!MO.isReg()) continue; 1776 1777 // This instruction modifies or uses CPSR after the one we want to 1778 // change. We can't do this transformation. 1779 if (MO.getReg() == ARM::CPSR) 1780 return false; 1781 } 1782 1783 if (I == B) 1784 // The 'and' is below the comparison instruction. 1785 return false; 1786 } 1787 1788 // Set the "zero" bit in CPSR. 1789 switch (MI->getOpcode()) { 1790 default: break; 1791 case ARM::RSBrr: 1792 case ARM::RSBri: 1793 case ARM::RSCrr: 1794 case ARM::RSCri: 1795 case ARM::ADDrr: 1796 case ARM::ADDri: 1797 case ARM::ADCrr: 1798 case ARM::ADCri: 1799 case ARM::SUBrr: 1800 case ARM::SUBri: 1801 case ARM::SBCrr: 1802 case ARM::SBCri: 1803 case ARM::t2RSBri: 1804 case ARM::t2ADDrr: 1805 case ARM::t2ADDri: 1806 case ARM::t2ADCrr: 1807 case ARM::t2ADCri: 1808 case ARM::t2SUBrr: 1809 case ARM::t2SUBri: 1810 case ARM::t2SBCrr: 1811 case ARM::t2SBCri: 1812 case ARM::ANDrr: 1813 case ARM::ANDri: 1814 case ARM::t2ANDrr: 1815 case ARM::t2ANDri: 1816 case ARM::ORRrr: 1817 case ARM::ORRri: 1818 case ARM::t2ORRrr: 1819 case ARM::t2ORRri: 1820 case ARM::EORrr: 1821 case ARM::EORri: 1822 case ARM::t2EORrr: 1823 case ARM::t2EORri: { 1824 // Scan forward for the use of CPSR, if it's a conditional code requires 1825 // checking of V bit, then this is not safe to do. If we can't find the 1826 // CPSR use (i.e. used in another block), then it's not safe to perform 1827 // the optimization. 1828 bool isSafe = false; 1829 I = CmpInstr; 1830 E = MI->getParent()->end(); 1831 while (!isSafe && ++I != E) { 1832 const MachineInstr &Instr = *I; 1833 for (unsigned IO = 0, EO = Instr.getNumOperands(); 1834 !isSafe && IO != EO; ++IO) { 1835 const MachineOperand &MO = Instr.getOperand(IO); 1836 if (!MO.isReg() || MO.getReg() != ARM::CPSR) 1837 continue; 1838 if (MO.isDef()) { 1839 isSafe = true; 1840 break; 1841 } 1842 // Condition code is after the operand before CPSR. 1843 ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); 1844 switch (CC) { 1845 default: 1846 isSafe = true; 1847 break; 1848 case ARMCC::VS: 1849 case ARMCC::VC: 1850 case ARMCC::GE: 1851 case ARMCC::LT: 1852 case ARMCC::GT: 1853 case ARMCC::LE: 1854 return false; 1855 } 1856 } 1857 } 1858 1859 if (!isSafe) 1860 return false; 1861 1862 // Toggle the optional operand to CPSR. 1863 MI->getOperand(5).setReg(ARM::CPSR); 1864 MI->getOperand(5).setIsDef(true); 1865 CmpInstr->eraseFromParent(); 1866 return true; 1867 } 1868 } 1869 1870 return false; 1871} 1872 1873bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, 1874 MachineInstr *DefMI, unsigned Reg, 1875 MachineRegisterInfo *MRI) const { 1876 // Fold large immediates into add, sub, or, xor. 1877 unsigned DefOpc = DefMI->getOpcode(); 1878 if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) 1879 return false; 1880 if (!DefMI->getOperand(1).isImm()) 1881 // Could be t2MOVi32imm <ga:xx> 1882 return false; 1883 1884 if (!MRI->hasOneNonDBGUse(Reg)) 1885 return false; 1886 1887 unsigned UseOpc = UseMI->getOpcode(); 1888 unsigned NewUseOpc = 0; 1889 uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); 1890 uint32_t SOImmValV1 = 0, SOImmValV2 = 0; 1891 bool Commute = false; 1892 switch (UseOpc) { 1893 default: return false; 1894 case ARM::SUBrr: 1895 case ARM::ADDrr: 1896 case ARM::ORRrr: 1897 case ARM::EORrr: 1898 case ARM::t2SUBrr: 1899 case ARM::t2ADDrr: 1900 case ARM::t2ORRrr: 1901 case ARM::t2EORrr: { 1902 Commute = UseMI->getOperand(2).getReg() != Reg; 1903 switch (UseOpc) { 1904 default: break; 1905 case ARM::SUBrr: { 1906 if (Commute) 1907 return false; 1908 ImmVal = -ImmVal; 1909 NewUseOpc = ARM::SUBri; 1910 // Fallthrough 1911 } 1912 case ARM::ADDrr: 1913 case ARM::ORRrr: 1914 case ARM::EORrr: { 1915 if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) 1916 return false; 1917 SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); 1918 SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); 1919 switch (UseOpc) { 1920 default: break; 1921 case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; 1922 case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; 1923 case ARM::EORrr: NewUseOpc = ARM::EORri; break; 1924 } 1925 break; 1926 } 1927 case ARM::t2SUBrr: { 1928 if (Commute) 1929 return false; 1930 ImmVal = -ImmVal; 1931 NewUseOpc = ARM::t2SUBri; 1932 // Fallthrough 1933 } 1934 case ARM::t2ADDrr: 1935 case ARM::t2ORRrr: 1936 case ARM::t2EORrr: { 1937 if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) 1938 return false; 1939 SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); 1940 SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); 1941 switch (UseOpc) { 1942 default: break; 1943 case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; 1944 case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; 1945 case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; 1946 } 1947 break; 1948 } 1949 } 1950 } 1951 } 1952 1953 unsigned OpIdx = Commute ? 2 : 1; 1954 unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); 1955 bool isKill = UseMI->getOperand(OpIdx).isKill(); 1956 unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); 1957 AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), 1958 *UseMI, UseMI->getDebugLoc(), 1959 get(NewUseOpc), NewReg) 1960 .addReg(Reg1, getKillRegState(isKill)) 1961 .addImm(SOImmValV1))); 1962 UseMI->setDesc(get(NewUseOpc)); 1963 UseMI->getOperand(1).setReg(NewReg); 1964 UseMI->getOperand(1).setIsKill(); 1965 UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); 1966 DefMI->eraseFromParent(); 1967 return true; 1968} 1969 1970unsigned 1971ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, 1972 const MachineInstr *MI) const { 1973 if (!ItinData || ItinData->isEmpty()) 1974 return 1; 1975 1976 const MCInstrDesc &Desc = MI->getDesc(); 1977 unsigned Class = Desc.getSchedClass(); 1978 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 1979 if (UOps) 1980 return UOps; 1981 1982 unsigned Opc = MI->getOpcode(); 1983 switch (Opc) { 1984 default: 1985 llvm_unreachable("Unexpected multi-uops instruction!"); 1986 break; 1987 case ARM::VLDMQIA: 1988 case ARM::VSTMQIA: 1989 return 2; 1990 1991 // The number of uOps for load / store multiple are determined by the number 1992 // registers. 1993 // 1994 // On Cortex-A8, each pair of register loads / stores can be scheduled on the 1995 // same cycle. The scheduling for the first load / store must be done 1996 // separately by assuming the the address is not 64-bit aligned. 1997 // 1998 // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address 1999 // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON 2000 // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. 2001 case ARM::VLDMDIA: 2002 case ARM::VLDMDIA_UPD: 2003 case ARM::VLDMDDB_UPD: 2004 case ARM::VLDMSIA: 2005 case ARM::VLDMSIA_UPD: 2006 case ARM::VLDMSDB_UPD: 2007 case ARM::VSTMDIA: 2008 case ARM::VSTMDIA_UPD: 2009 case ARM::VSTMDDB_UPD: 2010 case ARM::VSTMSIA: 2011 case ARM::VSTMSIA_UPD: 2012 case ARM::VSTMSDB_UPD: { 2013 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); 2014 return (NumRegs / 2) + (NumRegs % 2) + 1; 2015 } 2016 2017 case ARM::LDMIA_RET: 2018 case ARM::LDMIA: 2019 case ARM::LDMDA: 2020 case ARM::LDMDB: 2021 case ARM::LDMIB: 2022 case ARM::LDMIA_UPD: 2023 case ARM::LDMDA_UPD: 2024 case ARM::LDMDB_UPD: 2025 case ARM::LDMIB_UPD: 2026 case ARM::STMIA: 2027 case ARM::STMDA: 2028 case ARM::STMDB: 2029 case ARM::STMIB: 2030 case ARM::STMIA_UPD: 2031 case ARM::STMDA_UPD: 2032 case ARM::STMDB_UPD: 2033 case ARM::STMIB_UPD: 2034 case ARM::tLDMIA: 2035 case ARM::tLDMIA_UPD: 2036 case ARM::tSTMIA_UPD: 2037 case ARM::tPOP_RET: 2038 case ARM::tPOP: 2039 case ARM::tPUSH: 2040 case ARM::t2LDMIA_RET: 2041 case ARM::t2LDMIA: 2042 case ARM::t2LDMDB: 2043 case ARM::t2LDMIA_UPD: 2044 case ARM::t2LDMDB_UPD: 2045 case ARM::t2STMIA: 2046 case ARM::t2STMDB: 2047 case ARM::t2STMIA_UPD: 2048 case ARM::t2STMDB_UPD: { 2049 unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; 2050 if (Subtarget.isCortexA8()) { 2051 if (NumRegs < 4) 2052 return 2; 2053 // 4 registers would be issued: 2, 2. 2054 // 5 registers would be issued: 2, 2, 1. 2055 UOps = (NumRegs / 2); 2056 if (NumRegs % 2) 2057 ++UOps; 2058 return UOps; 2059 } else if (Subtarget.isCortexA9()) { 2060 UOps = (NumRegs / 2); 2061 // If there are odd number of registers or if it's not 64-bit aligned, 2062 // then it takes an extra AGU (Address Generation Unit) cycle. 2063 if ((NumRegs % 2) || 2064 !MI->hasOneMemOperand() || 2065 (*MI->memoperands_begin())->getAlignment() < 8) 2066 ++UOps; 2067 return UOps; 2068 } else { 2069 // Assume the worst. 2070 return NumRegs; 2071 } 2072 } 2073 } 2074} 2075 2076int 2077ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, 2078 const MCInstrDesc &DefMCID, 2079 unsigned DefClass, 2080 unsigned DefIdx, unsigned DefAlign) const { 2081 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2082 if (RegNo <= 0) 2083 // Def is the address writeback. 2084 return ItinData->getOperandCycle(DefClass, DefIdx); 2085 2086 int DefCycle; 2087 if (Subtarget.isCortexA8()) { 2088 // (regno / 2) + (regno % 2) + 1 2089 DefCycle = RegNo / 2 + 1; 2090 if (RegNo % 2) 2091 ++DefCycle; 2092 } else if (Subtarget.isCortexA9()) { 2093 DefCycle = RegNo; 2094 bool isSLoad = false; 2095 2096 switch (DefMCID.getOpcode()) { 2097 default: break; 2098 case ARM::VLDMSIA: 2099 case ARM::VLDMSIA_UPD: 2100 case ARM::VLDMSDB_UPD: 2101 isSLoad = true; 2102 break; 2103 } 2104 2105 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2106 // then it takes an extra cycle. 2107 if ((isSLoad && (RegNo % 2)) || DefAlign < 8) 2108 ++DefCycle; 2109 } else { 2110 // Assume the worst. 2111 DefCycle = RegNo + 2; 2112 } 2113 2114 return DefCycle; 2115} 2116 2117int 2118ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData, 2119 const MCInstrDesc &DefMCID, 2120 unsigned DefClass, 2121 unsigned DefIdx, unsigned DefAlign) const { 2122 int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1; 2123 if (RegNo <= 0) 2124 // Def is the address writeback. 2125 return ItinData->getOperandCycle(DefClass, DefIdx); 2126 2127 int DefCycle; 2128 if (Subtarget.isCortexA8()) { 2129 // 4 registers would be issued: 1, 2, 1. 2130 // 5 registers would be issued: 1, 2, 2. 2131 DefCycle = RegNo / 2; 2132 if (DefCycle < 1) 2133 DefCycle = 1; 2134 // Result latency is issue cycle + 2: E2. 2135 DefCycle += 2; 2136 } else if (Subtarget.isCortexA9()) { 2137 DefCycle = (RegNo / 2); 2138 // If there are odd number of registers or if it's not 64-bit aligned, 2139 // then it takes an extra AGU (Address Generation Unit) cycle. 2140 if ((RegNo % 2) || DefAlign < 8) 2141 ++DefCycle; 2142 // Result latency is AGU cycles + 2. 2143 DefCycle += 2; 2144 } else { 2145 // Assume the worst. 2146 DefCycle = RegNo + 2; 2147 } 2148 2149 return DefCycle; 2150} 2151 2152int 2153ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, 2154 const MCInstrDesc &UseMCID, 2155 unsigned UseClass, 2156 unsigned UseIdx, unsigned UseAlign) const { 2157 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2158 if (RegNo <= 0) 2159 return ItinData->getOperandCycle(UseClass, UseIdx); 2160 2161 int UseCycle; 2162 if (Subtarget.isCortexA8()) { 2163 // (regno / 2) + (regno % 2) + 1 2164 UseCycle = RegNo / 2 + 1; 2165 if (RegNo % 2) 2166 ++UseCycle; 2167 } else if (Subtarget.isCortexA9()) { 2168 UseCycle = RegNo; 2169 bool isSStore = false; 2170 2171 switch (UseMCID.getOpcode()) { 2172 default: break; 2173 case ARM::VSTMSIA: 2174 case ARM::VSTMSIA_UPD: 2175 case ARM::VSTMSDB_UPD: 2176 isSStore = true; 2177 break; 2178 } 2179 2180 // If there are odd number of 'S' registers or if it's not 64-bit aligned, 2181 // then it takes an extra cycle. 2182 if ((isSStore && (RegNo % 2)) || UseAlign < 8) 2183 ++UseCycle; 2184 } else { 2185 // Assume the worst. 2186 UseCycle = RegNo + 2; 2187 } 2188 2189 return UseCycle; 2190} 2191 2192int 2193ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData, 2194 const MCInstrDesc &UseMCID, 2195 unsigned UseClass, 2196 unsigned UseIdx, unsigned UseAlign) const { 2197 int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1; 2198 if (RegNo <= 0) 2199 return ItinData->getOperandCycle(UseClass, UseIdx); 2200 2201 int UseCycle; 2202 if (Subtarget.isCortexA8()) { 2203 UseCycle = RegNo / 2; 2204 if (UseCycle < 2) 2205 UseCycle = 2; 2206 // Read in E3. 2207 UseCycle += 2; 2208 } else if (Subtarget.isCortexA9()) { 2209 UseCycle = (RegNo / 2); 2210 // If there are odd number of registers or if it's not 64-bit aligned, 2211 // then it takes an extra AGU (Address Generation Unit) cycle. 2212 if ((RegNo % 2) || UseAlign < 8) 2213 ++UseCycle; 2214 } else { 2215 // Assume the worst. 2216 UseCycle = 1; 2217 } 2218 return UseCycle; 2219} 2220 2221int 2222ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2223 const MCInstrDesc &DefMCID, 2224 unsigned DefIdx, unsigned DefAlign, 2225 const MCInstrDesc &UseMCID, 2226 unsigned UseIdx, unsigned UseAlign) const { 2227 unsigned DefClass = DefMCID.getSchedClass(); 2228 unsigned UseClass = UseMCID.getSchedClass(); 2229 2230 if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands()) 2231 return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); 2232 2233 // This may be a def / use of a variable_ops instruction, the operand 2234 // latency might be determinable dynamically. Let the target try to 2235 // figure it out. 2236 int DefCycle = -1; 2237 bool LdmBypass = false; 2238 switch (DefMCID.getOpcode()) { 2239 default: 2240 DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2241 break; 2242 2243 case ARM::VLDMDIA: 2244 case ARM::VLDMDIA_UPD: 2245 case ARM::VLDMDDB_UPD: 2246 case ARM::VLDMSIA: 2247 case ARM::VLDMSIA_UPD: 2248 case ARM::VLDMSDB_UPD: 2249 DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2250 break; 2251 2252 case ARM::LDMIA_RET: 2253 case ARM::LDMIA: 2254 case ARM::LDMDA: 2255 case ARM::LDMDB: 2256 case ARM::LDMIB: 2257 case ARM::LDMIA_UPD: 2258 case ARM::LDMDA_UPD: 2259 case ARM::LDMDB_UPD: 2260 case ARM::LDMIB_UPD: 2261 case ARM::tLDMIA: 2262 case ARM::tLDMIA_UPD: 2263 case ARM::tPUSH: 2264 case ARM::t2LDMIA_RET: 2265 case ARM::t2LDMIA: 2266 case ARM::t2LDMDB: 2267 case ARM::t2LDMIA_UPD: 2268 case ARM::t2LDMDB_UPD: 2269 LdmBypass = 1; 2270 DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign); 2271 break; 2272 } 2273 2274 if (DefCycle == -1) 2275 // We can't seem to determine the result latency of the def, assume it's 2. 2276 DefCycle = 2; 2277 2278 int UseCycle = -1; 2279 switch (UseMCID.getOpcode()) { 2280 default: 2281 UseCycle = ItinData->getOperandCycle(UseClass, UseIdx); 2282 break; 2283 2284 case ARM::VSTMDIA: 2285 case ARM::VSTMDIA_UPD: 2286 case ARM::VSTMDDB_UPD: 2287 case ARM::VSTMSIA: 2288 case ARM::VSTMSIA_UPD: 2289 case ARM::VSTMSDB_UPD: 2290 UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 2291 break; 2292 2293 case ARM::STMIA: 2294 case ARM::STMDA: 2295 case ARM::STMDB: 2296 case ARM::STMIB: 2297 case ARM::STMIA_UPD: 2298 case ARM::STMDA_UPD: 2299 case ARM::STMDB_UPD: 2300 case ARM::STMIB_UPD: 2301 case ARM::tSTMIA_UPD: 2302 case ARM::tPOP_RET: 2303 case ARM::tPOP: 2304 case ARM::t2STMIA: 2305 case ARM::t2STMDB: 2306 case ARM::t2STMIA_UPD: 2307 case ARM::t2STMDB_UPD: 2308 UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign); 2309 break; 2310 } 2311 2312 if (UseCycle == -1) 2313 // Assume it's read in the first stage. 2314 UseCycle = 1; 2315 2316 UseCycle = DefCycle - UseCycle + 1; 2317 if (UseCycle > 0) { 2318 if (LdmBypass) { 2319 // It's a variable_ops instruction so we can't use DefIdx here. Just use 2320 // first def operand. 2321 if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1, 2322 UseClass, UseIdx)) 2323 --UseCycle; 2324 } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx, 2325 UseClass, UseIdx)) { 2326 --UseCycle; 2327 } 2328 } 2329 2330 return UseCycle; 2331} 2332 2333int 2334ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2335 const MachineInstr *DefMI, unsigned DefIdx, 2336 const MachineInstr *UseMI, unsigned UseIdx) const { 2337 if (DefMI->isCopyLike() || DefMI->isInsertSubreg() || 2338 DefMI->isRegSequence() || DefMI->isImplicitDef()) 2339 return 1; 2340 2341 if (!ItinData || ItinData->isEmpty()) 2342 return DefMI->mayLoad() ? 3 : 1; 2343 2344 const MCInstrDesc &DefMCID = DefMI->getDesc(); 2345 const MCInstrDesc &UseMCID = UseMI->getDesc(); 2346 const MachineOperand &DefMO = DefMI->getOperand(DefIdx); 2347 if (DefMO.getReg() == ARM::CPSR) { 2348 if (DefMI->getOpcode() == ARM::FMSTAT) { 2349 // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?) 2350 return Subtarget.isCortexA9() ? 1 : 20; 2351 } 2352 2353 // CPSR set and branch can be paired in the same cycle. 2354 if (UseMI->isBranch()) 2355 return 0; 2356 } 2357 2358 unsigned DefAlign = DefMI->hasOneMemOperand() 2359 ? (*DefMI->memoperands_begin())->getAlignment() : 0; 2360 unsigned UseAlign = UseMI->hasOneMemOperand() 2361 ? (*UseMI->memoperands_begin())->getAlignment() : 0; 2362 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 2363 UseMCID, UseIdx, UseAlign); 2364 2365 if (Latency > 1 && 2366 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 2367 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 2368 // variants are one cycle cheaper. 2369 switch (DefMCID.getOpcode()) { 2370 default: break; 2371 case ARM::LDRrs: 2372 case ARM::LDRBrs: { 2373 unsigned ShOpVal = DefMI->getOperand(3).getImm(); 2374 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2375 if (ShImm == 0 || 2376 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 2377 --Latency; 2378 break; 2379 } 2380 case ARM::t2LDRs: 2381 case ARM::t2LDRBs: 2382 case ARM::t2LDRHs: 2383 case ARM::t2LDRSHs: { 2384 // Thumb2 mode: lsl only. 2385 unsigned ShAmt = DefMI->getOperand(3).getImm(); 2386 if (ShAmt == 0 || ShAmt == 2) 2387 --Latency; 2388 break; 2389 } 2390 } 2391 } 2392 2393 if (DefAlign < 8 && Subtarget.isCortexA9()) 2394 switch (DefMCID.getOpcode()) { 2395 default: break; 2396 case ARM::VLD1q8: 2397 case ARM::VLD1q16: 2398 case ARM::VLD1q32: 2399 case ARM::VLD1q64: 2400 case ARM::VLD1q8wb_fixed: 2401 case ARM::VLD1q16wb_fixed: 2402 case ARM::VLD1q32wb_fixed: 2403 case ARM::VLD1q64wb_fixed: 2404 case ARM::VLD1q8wb_register: 2405 case ARM::VLD1q16wb_register: 2406 case ARM::VLD1q32wb_register: 2407 case ARM::VLD1q64wb_register: 2408 case ARM::VLD2d8: 2409 case ARM::VLD2d16: 2410 case ARM::VLD2d32: 2411 case ARM::VLD2q8: 2412 case ARM::VLD2q16: 2413 case ARM::VLD2q32: 2414 case ARM::VLD2d8wb_fixed: 2415 case ARM::VLD2d16wb_fixed: 2416 case ARM::VLD2d32wb_fixed: 2417 case ARM::VLD2q8wb_fixed: 2418 case ARM::VLD2q16wb_fixed: 2419 case ARM::VLD2q32wb_fixed: 2420 case ARM::VLD2d8wb_register: 2421 case ARM::VLD2d16wb_register: 2422 case ARM::VLD2d32wb_register: 2423 case ARM::VLD2q8wb_register: 2424 case ARM::VLD2q16wb_register: 2425 case ARM::VLD2q32wb_register: 2426 case ARM::VLD3d8: 2427 case ARM::VLD3d16: 2428 case ARM::VLD3d32: 2429 case ARM::VLD1d64T: 2430 case ARM::VLD3d8_UPD: 2431 case ARM::VLD3d16_UPD: 2432 case ARM::VLD3d32_UPD: 2433 case ARM::VLD1d64Twb_fixed: 2434 case ARM::VLD1d64Twb_register: 2435 case ARM::VLD3q8_UPD: 2436 case ARM::VLD3q16_UPD: 2437 case ARM::VLD3q32_UPD: 2438 case ARM::VLD4d8: 2439 case ARM::VLD4d16: 2440 case ARM::VLD4d32: 2441 case ARM::VLD1d64Q: 2442 case ARM::VLD4d8_UPD: 2443 case ARM::VLD4d16_UPD: 2444 case ARM::VLD4d32_UPD: 2445 case ARM::VLD1d64Qwb_fixed: 2446 case ARM::VLD1d64Qwb_register: 2447 case ARM::VLD4q8_UPD: 2448 case ARM::VLD4q16_UPD: 2449 case ARM::VLD4q32_UPD: 2450 case ARM::VLD1DUPq8: 2451 case ARM::VLD1DUPq16: 2452 case ARM::VLD1DUPq32: 2453 case ARM::VLD1DUPq8wb_fixed: 2454 case ARM::VLD1DUPq16wb_fixed: 2455 case ARM::VLD1DUPq32wb_fixed: 2456 case ARM::VLD1DUPq8wb_register: 2457 case ARM::VLD1DUPq16wb_register: 2458 case ARM::VLD1DUPq32wb_register: 2459 case ARM::VLD2DUPd8: 2460 case ARM::VLD2DUPd16: 2461 case ARM::VLD2DUPd32: 2462 case ARM::VLD2DUPd8_UPD: 2463 case ARM::VLD2DUPd16_UPD: 2464 case ARM::VLD2DUPd32_UPD: 2465 case ARM::VLD4DUPd8: 2466 case ARM::VLD4DUPd16: 2467 case ARM::VLD4DUPd32: 2468 case ARM::VLD4DUPd8_UPD: 2469 case ARM::VLD4DUPd16_UPD: 2470 case ARM::VLD4DUPd32_UPD: 2471 case ARM::VLD1LNd8: 2472 case ARM::VLD1LNd16: 2473 case ARM::VLD1LNd32: 2474 case ARM::VLD1LNd8_UPD: 2475 case ARM::VLD1LNd16_UPD: 2476 case ARM::VLD1LNd32_UPD: 2477 case ARM::VLD2LNd8: 2478 case ARM::VLD2LNd16: 2479 case ARM::VLD2LNd32: 2480 case ARM::VLD2LNq16: 2481 case ARM::VLD2LNq32: 2482 case ARM::VLD2LNd8_UPD: 2483 case ARM::VLD2LNd16_UPD: 2484 case ARM::VLD2LNd32_UPD: 2485 case ARM::VLD2LNq16_UPD: 2486 case ARM::VLD2LNq32_UPD: 2487 case ARM::VLD4LNd8: 2488 case ARM::VLD4LNd16: 2489 case ARM::VLD4LNd32: 2490 case ARM::VLD4LNq16: 2491 case ARM::VLD4LNq32: 2492 case ARM::VLD4LNd8_UPD: 2493 case ARM::VLD4LNd16_UPD: 2494 case ARM::VLD4LNd32_UPD: 2495 case ARM::VLD4LNq16_UPD: 2496 case ARM::VLD4LNq32_UPD: 2497 // If the address is not 64-bit aligned, the latencies of these 2498 // instructions increases by one. 2499 ++Latency; 2500 break; 2501 } 2502 2503 return Latency; 2504} 2505 2506int 2507ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, 2508 SDNode *DefNode, unsigned DefIdx, 2509 SDNode *UseNode, unsigned UseIdx) const { 2510 if (!DefNode->isMachineOpcode()) 2511 return 1; 2512 2513 const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode()); 2514 2515 if (isZeroCost(DefMCID.Opcode)) 2516 return 0; 2517 2518 if (!ItinData || ItinData->isEmpty()) 2519 return DefMCID.mayLoad() ? 3 : 1; 2520 2521 if (!UseNode->isMachineOpcode()) { 2522 int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx); 2523 if (Subtarget.isCortexA9()) 2524 return Latency <= 2 ? 1 : Latency - 1; 2525 else 2526 return Latency <= 3 ? 1 : Latency - 2; 2527 } 2528 2529 const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode()); 2530 const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); 2531 unsigned DefAlign = !DefMN->memoperands_empty() 2532 ? (*DefMN->memoperands_begin())->getAlignment() : 0; 2533 const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode); 2534 unsigned UseAlign = !UseMN->memoperands_empty() 2535 ? (*UseMN->memoperands_begin())->getAlignment() : 0; 2536 int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign, 2537 UseMCID, UseIdx, UseAlign); 2538 2539 if (Latency > 1 && 2540 (Subtarget.isCortexA8() || Subtarget.isCortexA9())) { 2541 // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2] 2542 // variants are one cycle cheaper. 2543 switch (DefMCID.getOpcode()) { 2544 default: break; 2545 case ARM::LDRrs: 2546 case ARM::LDRBrs: { 2547 unsigned ShOpVal = 2548 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2549 unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal); 2550 if (ShImm == 0 || 2551 (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)) 2552 --Latency; 2553 break; 2554 } 2555 case ARM::t2LDRs: 2556 case ARM::t2LDRBs: 2557 case ARM::t2LDRHs: 2558 case ARM::t2LDRSHs: { 2559 // Thumb2 mode: lsl only. 2560 unsigned ShAmt = 2561 cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue(); 2562 if (ShAmt == 0 || ShAmt == 2) 2563 --Latency; 2564 break; 2565 } 2566 } 2567 } 2568 2569 if (DefAlign < 8 && Subtarget.isCortexA9()) 2570 switch (DefMCID.getOpcode()) { 2571 default: break; 2572 case ARM::VLD1q8Pseudo: 2573 case ARM::VLD1q16Pseudo: 2574 case ARM::VLD1q32Pseudo: 2575 case ARM::VLD1q64Pseudo: 2576 case ARM::VLD1q8PseudoWB_register: 2577 case ARM::VLD1q16PseudoWB_register: 2578 case ARM::VLD1q32PseudoWB_register: 2579 case ARM::VLD1q64PseudoWB_register: 2580 case ARM::VLD1q8PseudoWB_fixed: 2581 case ARM::VLD1q16PseudoWB_fixed: 2582 case ARM::VLD1q32PseudoWB_fixed: 2583 case ARM::VLD1q64PseudoWB_fixed: 2584 case ARM::VLD2d8Pseudo: 2585 case ARM::VLD2d16Pseudo: 2586 case ARM::VLD2d32Pseudo: 2587 case ARM::VLD2q8Pseudo: 2588 case ARM::VLD2q16Pseudo: 2589 case ARM::VLD2q32Pseudo: 2590 case ARM::VLD2d8PseudoWB_fixed: 2591 case ARM::VLD2d16PseudoWB_fixed: 2592 case ARM::VLD2d32PseudoWB_fixed: 2593 case ARM::VLD2q8PseudoWB_fixed: 2594 case ARM::VLD2q16PseudoWB_fixed: 2595 case ARM::VLD2q32PseudoWB_fixed: 2596 case ARM::VLD2d8PseudoWB_register: 2597 case ARM::VLD2d16PseudoWB_register: 2598 case ARM::VLD2d32PseudoWB_register: 2599 case ARM::VLD2q8PseudoWB_register: 2600 case ARM::VLD2q16PseudoWB_register: 2601 case ARM::VLD2q32PseudoWB_register: 2602 case ARM::VLD3d8Pseudo: 2603 case ARM::VLD3d16Pseudo: 2604 case ARM::VLD3d32Pseudo: 2605 case ARM::VLD1d64TPseudo: 2606 case ARM::VLD3d8Pseudo_UPD: 2607 case ARM::VLD3d16Pseudo_UPD: 2608 case ARM::VLD3d32Pseudo_UPD: 2609 case ARM::VLD3q8Pseudo_UPD: 2610 case ARM::VLD3q16Pseudo_UPD: 2611 case ARM::VLD3q32Pseudo_UPD: 2612 case ARM::VLD3q8oddPseudo: 2613 case ARM::VLD3q16oddPseudo: 2614 case ARM::VLD3q32oddPseudo: 2615 case ARM::VLD3q8oddPseudo_UPD: 2616 case ARM::VLD3q16oddPseudo_UPD: 2617 case ARM::VLD3q32oddPseudo_UPD: 2618 case ARM::VLD4d8Pseudo: 2619 case ARM::VLD4d16Pseudo: 2620 case ARM::VLD4d32Pseudo: 2621 case ARM::VLD1d64QPseudo: 2622 case ARM::VLD4d8Pseudo_UPD: 2623 case ARM::VLD4d16Pseudo_UPD: 2624 case ARM::VLD4d32Pseudo_UPD: 2625 case ARM::VLD4q8Pseudo_UPD: 2626 case ARM::VLD4q16Pseudo_UPD: 2627 case ARM::VLD4q32Pseudo_UPD: 2628 case ARM::VLD4q8oddPseudo: 2629 case ARM::VLD4q16oddPseudo: 2630 case ARM::VLD4q32oddPseudo: 2631 case ARM::VLD4q8oddPseudo_UPD: 2632 case ARM::VLD4q16oddPseudo_UPD: 2633 case ARM::VLD4q32oddPseudo_UPD: 2634 case ARM::VLD1DUPq8Pseudo: 2635 case ARM::VLD1DUPq16Pseudo: 2636 case ARM::VLD1DUPq32Pseudo: 2637 case ARM::VLD1DUPq8PseudoWB_fixed: 2638 case ARM::VLD1DUPq16PseudoWB_fixed: 2639 case ARM::VLD1DUPq32PseudoWB_fixed: 2640 case ARM::VLD1DUPq8PseudoWB_register: 2641 case ARM::VLD1DUPq16PseudoWB_register: 2642 case ARM::VLD1DUPq32PseudoWB_register: 2643 case ARM::VLD2DUPd8Pseudo: 2644 case ARM::VLD2DUPd16Pseudo: 2645 case ARM::VLD2DUPd32Pseudo: 2646 case ARM::VLD2DUPd8Pseudo_UPD: 2647 case ARM::VLD2DUPd16Pseudo_UPD: 2648 case ARM::VLD2DUPd32Pseudo_UPD: 2649 case ARM::VLD4DUPd8Pseudo: 2650 case ARM::VLD4DUPd16Pseudo: 2651 case ARM::VLD4DUPd32Pseudo: 2652 case ARM::VLD4DUPd8Pseudo_UPD: 2653 case ARM::VLD4DUPd16Pseudo_UPD: 2654 case ARM::VLD4DUPd32Pseudo_UPD: 2655 case ARM::VLD1LNq8Pseudo: 2656 case ARM::VLD1LNq16Pseudo: 2657 case ARM::VLD1LNq32Pseudo: 2658 case ARM::VLD1LNq8Pseudo_UPD: 2659 case ARM::VLD1LNq16Pseudo_UPD: 2660 case ARM::VLD1LNq32Pseudo_UPD: 2661 case ARM::VLD2LNd8Pseudo: 2662 case ARM::VLD2LNd16Pseudo: 2663 case ARM::VLD2LNd32Pseudo: 2664 case ARM::VLD2LNq16Pseudo: 2665 case ARM::VLD2LNq32Pseudo: 2666 case ARM::VLD2LNd8Pseudo_UPD: 2667 case ARM::VLD2LNd16Pseudo_UPD: 2668 case ARM::VLD2LNd32Pseudo_UPD: 2669 case ARM::VLD2LNq16Pseudo_UPD: 2670 case ARM::VLD2LNq32Pseudo_UPD: 2671 case ARM::VLD4LNd8Pseudo: 2672 case ARM::VLD4LNd16Pseudo: 2673 case ARM::VLD4LNd32Pseudo: 2674 case ARM::VLD4LNq16Pseudo: 2675 case ARM::VLD4LNq32Pseudo: 2676 case ARM::VLD4LNd8Pseudo_UPD: 2677 case ARM::VLD4LNd16Pseudo_UPD: 2678 case ARM::VLD4LNd32Pseudo_UPD: 2679 case ARM::VLD4LNq16Pseudo_UPD: 2680 case ARM::VLD4LNq32Pseudo_UPD: 2681 // If the address is not 64-bit aligned, the latencies of these 2682 // instructions increases by one. 2683 ++Latency; 2684 break; 2685 } 2686 2687 return Latency; 2688} 2689 2690int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2691 const MachineInstr *MI, 2692 unsigned *PredCost) const { 2693 if (MI->isCopyLike() || MI->isInsertSubreg() || 2694 MI->isRegSequence() || MI->isImplicitDef()) 2695 return 1; 2696 2697 if (!ItinData || ItinData->isEmpty()) 2698 return 1; 2699 2700 const MCInstrDesc &MCID = MI->getDesc(); 2701 unsigned Class = MCID.getSchedClass(); 2702 unsigned UOps = ItinData->Itineraries[Class].NumMicroOps; 2703 if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) 2704 // When predicated, CPSR is an additional source operand for CPSR updating 2705 // instructions, this apparently increases their latencies. 2706 *PredCost = 1; 2707 if (UOps) 2708 return ItinData->getStageLatency(Class); 2709 return getNumMicroOps(ItinData, MI); 2710} 2711 2712int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 2713 SDNode *Node) const { 2714 if (!Node->isMachineOpcode()) 2715 return 1; 2716 2717 if (!ItinData || ItinData->isEmpty()) 2718 return 1; 2719 2720 unsigned Opcode = Node->getMachineOpcode(); 2721 switch (Opcode) { 2722 default: 2723 return ItinData->getStageLatency(get(Opcode).getSchedClass()); 2724 case ARM::VLDMQIA: 2725 case ARM::VSTMQIA: 2726 return 2; 2727 } 2728} 2729 2730bool ARMBaseInstrInfo:: 2731hasHighOperandLatency(const InstrItineraryData *ItinData, 2732 const MachineRegisterInfo *MRI, 2733 const MachineInstr *DefMI, unsigned DefIdx, 2734 const MachineInstr *UseMI, unsigned UseIdx) const { 2735 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2736 unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask; 2737 if (Subtarget.isCortexA8() && 2738 (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP)) 2739 // CortexA8 VFP instructions are not pipelined. 2740 return true; 2741 2742 // Hoist VFP / NEON instructions with 4 or higher latency. 2743 int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); 2744 if (Latency <= 3) 2745 return false; 2746 return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON || 2747 UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON; 2748} 2749 2750bool ARMBaseInstrInfo:: 2751hasLowDefLatency(const InstrItineraryData *ItinData, 2752 const MachineInstr *DefMI, unsigned DefIdx) const { 2753 if (!ItinData || ItinData->isEmpty()) 2754 return false; 2755 2756 unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask; 2757 if (DDomain == ARMII::DomainGeneral) { 2758 unsigned DefClass = DefMI->getDesc().getSchedClass(); 2759 int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); 2760 return (DefCycle != -1 && DefCycle <= 2); 2761 } 2762 return false; 2763} 2764 2765bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, 2766 StringRef &ErrInfo) const { 2767 if (convertAddSubFlagsOpcode(MI->getOpcode())) { 2768 ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG"; 2769 return false; 2770 } 2771 return true; 2772} 2773 2774bool 2775ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, 2776 unsigned &AddSubOpc, 2777 bool &NegAcc, bool &HasLane) const { 2778 DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode); 2779 if (I == MLxEntryMap.end()) 2780 return false; 2781 2782 const ARM_MLxEntry &Entry = ARM_MLxTable[I->second]; 2783 MulOpc = Entry.MulOpc; 2784 AddSubOpc = Entry.AddSubOpc; 2785 NegAcc = Entry.NegAcc; 2786 HasLane = Entry.HasLane; 2787 return true; 2788} 2789 2790//===----------------------------------------------------------------------===// 2791// Execution domains. 2792//===----------------------------------------------------------------------===// 2793// 2794// Some instructions go down the NEON pipeline, some go down the VFP pipeline, 2795// and some can go down both. The vmov instructions go down the VFP pipeline, 2796// but they can be changed to vorr equivalents that are executed by the NEON 2797// pipeline. 2798// 2799// We use the following execution domain numbering: 2800// 2801enum ARMExeDomain { 2802 ExeGeneric = 0, 2803 ExeVFP = 1, 2804 ExeNEON = 2 2805}; 2806// 2807// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h 2808// 2809std::pair<uint16_t, uint16_t> 2810ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { 2811 // VMOVD is a VFP instruction, but can be changed to NEON if it isn't 2812 // predicated. 2813 if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) 2814 return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); 2815 2816 // No other instructions can be swizzled, so just determine their domain. 2817 unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; 2818 2819 if (Domain & ARMII::DomainNEON) 2820 return std::make_pair(ExeNEON, 0); 2821 2822 // Certain instructions can go either way on Cortex-A8. 2823 // Treat them as NEON instructions. 2824 if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8()) 2825 return std::make_pair(ExeNEON, 0); 2826 2827 if (Domain & ARMII::DomainVFP) 2828 return std::make_pair(ExeVFP, 0); 2829 2830 return std::make_pair(ExeGeneric, 0); 2831} 2832 2833void 2834ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { 2835 // We only know how to change VMOVD into VORR. 2836 assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); 2837 if (Domain != ExeNEON) 2838 return; 2839 2840 // Zap the predicate operands. 2841 assert(!isPredicated(MI) && "Cannot predicate a VORRd"); 2842 MI->RemoveOperand(3); 2843 MI->RemoveOperand(2); 2844 2845 // Change to a VORRd which requires two identical use operands. 2846 MI->setDesc(get(ARM::VORRd)); 2847 2848 // Add the extra source operand and new predicates. 2849 // This will go before any implicit ops. 2850 AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); 2851} 2852