1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the AArch64 implementation of the TargetInstrInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "AArch64InstrInfo.h" 15#include "AArch64Subtarget.h" 16#include "MCTargetDesc/AArch64AddressingModes.h" 17#include "llvm/CodeGen/MachineFrameInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "llvm/CodeGen/MachineMemOperand.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/PseudoSourceValue.h" 22#include "llvm/MC/MCInst.h" 23#include "llvm/Support/ErrorHandling.h" 24#include "llvm/Support/TargetRegistry.h" 25 26using namespace llvm; 27 28#define GET_INSTRINFO_CTOR_DTOR 29#include "AArch64GenInstrInfo.inc" 30 31AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) 32 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), 33 RI(this, &STI), Subtarget(STI) {} 34 35/// GetInstSize - Return the number of bytes of code the specified 36/// instruction may be. This returns the maximum number of bytes. 37unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 38 const MachineBasicBlock &MBB = *MI->getParent(); 39 const MachineFunction *MF = MBB.getParent(); 40 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); 41 42 if (MI->getOpcode() == AArch64::INLINEASM) 43 return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); 44 45 const MCInstrDesc &Desc = MI->getDesc(); 46 switch (Desc.getOpcode()) { 47 default: 48 // Anything not explicitly designated otherwise is a nomal 4-byte insn. 49 return 4; 50 case TargetOpcode::DBG_VALUE: 51 case TargetOpcode::EH_LABEL: 52 case TargetOpcode::IMPLICIT_DEF: 53 case TargetOpcode::KILL: 54 return 0; 55 } 56 57 llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); 58} 59 60static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, 61 SmallVectorImpl<MachineOperand> &Cond) { 62 // Block ends with fall-through condbranch. 63 switch (LastInst->getOpcode()) { 64 default: 65 llvm_unreachable("Unknown branch instruction?"); 66 case AArch64::Bcc: 67 Target = LastInst->getOperand(1).getMBB(); 68 Cond.push_back(LastInst->getOperand(0)); 69 break; 70 case AArch64::CBZW: 71 case AArch64::CBZX: 72 case AArch64::CBNZW: 73 case AArch64::CBNZX: 74 Target = LastInst->getOperand(1).getMBB(); 75 Cond.push_back(MachineOperand::CreateImm(-1)); 76 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 77 Cond.push_back(LastInst->getOperand(0)); 78 break; 79 case AArch64::TBZW: 80 case AArch64::TBZX: 81 case AArch64::TBNZW: 82 case AArch64::TBNZX: 83 Target = LastInst->getOperand(2).getMBB(); 84 Cond.push_back(MachineOperand::CreateImm(-1)); 85 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); 86 Cond.push_back(LastInst->getOperand(0)); 87 Cond.push_back(LastInst->getOperand(1)); 88 } 89} 90 91// Branch analysis. 92bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 93 MachineBasicBlock *&TBB, 94 MachineBasicBlock *&FBB, 95 SmallVectorImpl<MachineOperand> &Cond, 96 bool AllowModify) const { 97 // If the block has no terminators, it just falls into the block after it. 98 MachineBasicBlock::iterator I = MBB.end(); 99 if (I == MBB.begin()) 100 return false; 101 --I; 102 while (I->isDebugValue()) { 103 if (I == MBB.begin()) 104 return false; 105 --I; 106 } 107 if (!isUnpredicatedTerminator(I)) 108 return false; 109 110 // Get the last instruction in the block. 111 MachineInstr *LastInst = I; 112 113 // If there is only one terminator instruction, process it. 114 unsigned LastOpc = LastInst->getOpcode(); 115 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 116 if (isUncondBranchOpcode(LastOpc)) { 117 TBB = LastInst->getOperand(0).getMBB(); 118 return false; 119 } 120 if (isCondBranchOpcode(LastOpc)) { 121 // Block ends with fall-through condbranch. 122 parseCondBranch(LastInst, TBB, Cond); 123 return false; 124 } 125 return true; // Can't handle indirect branch. 126 } 127 128 // Get the instruction before it if it is a terminator. 129 MachineInstr *SecondLastInst = I; 130 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 131 132 // If AllowModify is true and the block ends with two or more unconditional 133 // branches, delete all but the first unconditional branch. 134 if (AllowModify && isUncondBranchOpcode(LastOpc)) { 135 while (isUncondBranchOpcode(SecondLastOpc)) { 136 LastInst->eraseFromParent(); 137 LastInst = SecondLastInst; 138 LastOpc = LastInst->getOpcode(); 139 if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { 140 // Return now the only terminator is an unconditional branch. 141 TBB = LastInst->getOperand(0).getMBB(); 142 return false; 143 } else { 144 SecondLastInst = I; 145 SecondLastOpc = SecondLastInst->getOpcode(); 146 } 147 } 148 } 149 150 // If there are three terminators, we don't know what sort of block this is. 151 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) 152 return true; 153 154 // If the block ends with a B and a Bcc, handle it. 155 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 156 parseCondBranch(SecondLastInst, TBB, Cond); 157 FBB = LastInst->getOperand(0).getMBB(); 158 return false; 159 } 160 161 // If the block ends with two unconditional branches, handle it. The second 162 // one is not executed, so remove it. 163 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 164 TBB = SecondLastInst->getOperand(0).getMBB(); 165 I = LastInst; 166 if (AllowModify) 167 I->eraseFromParent(); 168 return false; 169 } 170 171 // ...likewise if it ends with an indirect branch followed by an unconditional 172 // branch. 173 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { 174 I = LastInst; 175 if (AllowModify) 176 I->eraseFromParent(); 177 return true; 178 } 179 180 // Otherwise, can't handle this. 181 return true; 182} 183 184bool AArch64InstrInfo::ReverseBranchCondition( 185 SmallVectorImpl<MachineOperand> &Cond) const { 186 if (Cond[0].getImm() != -1) { 187 // Regular Bcc 188 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); 189 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); 190 } else { 191 // Folded compare-and-branch 192 switch (Cond[1].getImm()) { 193 default: 194 llvm_unreachable("Unknown conditional branch!"); 195 case AArch64::CBZW: 196 Cond[1].setImm(AArch64::CBNZW); 197 break; 198 case AArch64::CBNZW: 199 Cond[1].setImm(AArch64::CBZW); 200 break; 201 case AArch64::CBZX: 202 Cond[1].setImm(AArch64::CBNZX); 203 break; 204 case AArch64::CBNZX: 205 Cond[1].setImm(AArch64::CBZX); 206 break; 207 case AArch64::TBZW: 208 Cond[1].setImm(AArch64::TBNZW); 209 break; 210 case AArch64::TBNZW: 211 Cond[1].setImm(AArch64::TBZW); 212 break; 213 case AArch64::TBZX: 214 Cond[1].setImm(AArch64::TBNZX); 215 break; 216 case AArch64::TBNZX: 217 Cond[1].setImm(AArch64::TBZX); 218 break; 219 } 220 } 221 222 return false; 223} 224 225unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 226 MachineBasicBlock::iterator I = MBB.end(); 227 if (I == MBB.begin()) 228 return 0; 229 --I; 230 while (I->isDebugValue()) { 231 if (I == MBB.begin()) 232 return 0; 233 --I; 234 } 235 if (!isUncondBranchOpcode(I->getOpcode()) && 236 !isCondBranchOpcode(I->getOpcode())) 237 return 0; 238 239 // Remove the branch. 240 I->eraseFromParent(); 241 242 I = MBB.end(); 243 244 if (I == MBB.begin()) 245 return 1; 246 --I; 247 if (!isCondBranchOpcode(I->getOpcode())) 248 return 1; 249 250 // Remove the branch. 251 I->eraseFromParent(); 252 return 2; 253} 254 255void AArch64InstrInfo::instantiateCondBranch( 256 MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, 257 const SmallVectorImpl<MachineOperand> &Cond) const { 258 if (Cond[0].getImm() != -1) { 259 // Regular Bcc 260 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); 261 } else { 262 // Folded compare-and-branch 263 const MachineInstrBuilder MIB = 264 BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); 265 if (Cond.size() > 3) 266 MIB.addImm(Cond[3].getImm()); 267 MIB.addMBB(TBB); 268 } 269} 270 271unsigned AArch64InstrInfo::InsertBranch( 272 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 273 const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { 274 // Shouldn't be a fall through. 275 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 276 277 if (!FBB) { 278 if (Cond.empty()) // Unconditional branch? 279 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); 280 else 281 instantiateCondBranch(MBB, DL, TBB, Cond); 282 return 1; 283 } 284 285 // Two-way conditional branch. 286 instantiateCondBranch(MBB, DL, TBB, Cond); 287 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); 288 return 2; 289} 290 291// Find the original register that VReg is copied from. 292static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { 293 while (TargetRegisterInfo::isVirtualRegister(VReg)) { 294 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 295 if (!DefMI->isFullCopy()) 296 return VReg; 297 VReg = DefMI->getOperand(1).getReg(); 298 } 299 return VReg; 300} 301 302// Determine if VReg is defined by an instruction that can be folded into a 303// csel instruction. If so, return the folded opcode, and the replacement 304// register. 305static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, 306 unsigned *NewVReg = nullptr) { 307 VReg = removeCopies(MRI, VReg); 308 if (!TargetRegisterInfo::isVirtualRegister(VReg)) 309 return 0; 310 311 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); 312 const MachineInstr *DefMI = MRI.getVRegDef(VReg); 313 unsigned Opc = 0; 314 unsigned SrcOpNum = 0; 315 switch (DefMI->getOpcode()) { 316 case AArch64::ADDSXri: 317 case AArch64::ADDSWri: 318 // if NZCV is used, do not fold. 319 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 320 return 0; 321 // fall-through to ADDXri and ADDWri. 322 case AArch64::ADDXri: 323 case AArch64::ADDWri: 324 // add x, 1 -> csinc. 325 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || 326 DefMI->getOperand(3).getImm() != 0) 327 return 0; 328 SrcOpNum = 1; 329 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; 330 break; 331 332 case AArch64::ORNXrr: 333 case AArch64::ORNWrr: { 334 // not x -> csinv, represented as orn dst, xzr, src. 335 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 336 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 337 return 0; 338 SrcOpNum = 2; 339 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; 340 break; 341 } 342 343 case AArch64::SUBSXrr: 344 case AArch64::SUBSWrr: 345 // if NZCV is used, do not fold. 346 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) 347 return 0; 348 // fall-through to SUBXrr and SUBWrr. 349 case AArch64::SUBXrr: 350 case AArch64::SUBWrr: { 351 // neg x -> csneg, represented as sub dst, xzr, src. 352 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); 353 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) 354 return 0; 355 SrcOpNum = 2; 356 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; 357 break; 358 } 359 default: 360 return 0; 361 } 362 assert(Opc && SrcOpNum && "Missing parameters"); 363 364 if (NewVReg) 365 *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); 366 return Opc; 367} 368 369bool AArch64InstrInfo::canInsertSelect( 370 const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, 371 unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, 372 int &FalseCycles) const { 373 // Check register classes. 374 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 375 const TargetRegisterClass *RC = 376 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); 377 if (!RC) 378 return false; 379 380 // Expanding cbz/tbz requires an extra cycle of latency on the condition. 381 unsigned ExtraCondLat = Cond.size() != 1; 382 383 // GPRs are handled by csel. 384 // FIXME: Fold in x+1, -x, and ~x when applicable. 385 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || 386 AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 387 // Single-cycle csel, csinc, csinv, and csneg. 388 CondCycles = 1 + ExtraCondLat; 389 TrueCycles = FalseCycles = 1; 390 if (canFoldIntoCSel(MRI, TrueReg)) 391 TrueCycles = 0; 392 else if (canFoldIntoCSel(MRI, FalseReg)) 393 FalseCycles = 0; 394 return true; 395 } 396 397 // Scalar floating point is handled by fcsel. 398 // FIXME: Form fabs, fmin, and fmax when applicable. 399 if (AArch64::FPR64RegClass.hasSubClassEq(RC) || 400 AArch64::FPR32RegClass.hasSubClassEq(RC)) { 401 CondCycles = 5 + ExtraCondLat; 402 TrueCycles = FalseCycles = 2; 403 return true; 404 } 405 406 // Can't do vectors. 407 return false; 408} 409 410void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, 411 MachineBasicBlock::iterator I, DebugLoc DL, 412 unsigned DstReg, 413 const SmallVectorImpl<MachineOperand> &Cond, 414 unsigned TrueReg, unsigned FalseReg) const { 415 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 416 417 // Parse the condition code, see parseCondBranch() above. 418 AArch64CC::CondCode CC; 419 switch (Cond.size()) { 420 default: 421 llvm_unreachable("Unknown condition opcode in Cond"); 422 case 1: // b.cc 423 CC = AArch64CC::CondCode(Cond[0].getImm()); 424 break; 425 case 3: { // cbz/cbnz 426 // We must insert a compare against 0. 427 bool Is64Bit; 428 switch (Cond[1].getImm()) { 429 default: 430 llvm_unreachable("Unknown branch opcode in Cond"); 431 case AArch64::CBZW: 432 Is64Bit = 0; 433 CC = AArch64CC::EQ; 434 break; 435 case AArch64::CBZX: 436 Is64Bit = 1; 437 CC = AArch64CC::EQ; 438 break; 439 case AArch64::CBNZW: 440 Is64Bit = 0; 441 CC = AArch64CC::NE; 442 break; 443 case AArch64::CBNZX: 444 Is64Bit = 1; 445 CC = AArch64CC::NE; 446 break; 447 } 448 unsigned SrcReg = Cond[2].getReg(); 449 if (Is64Bit) { 450 // cmp reg, #0 is actually subs xzr, reg, #0. 451 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); 452 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) 453 .addReg(SrcReg) 454 .addImm(0) 455 .addImm(0); 456 } else { 457 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); 458 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) 459 .addReg(SrcReg) 460 .addImm(0) 461 .addImm(0); 462 } 463 break; 464 } 465 case 4: { // tbz/tbnz 466 // We must insert a tst instruction. 467 switch (Cond[1].getImm()) { 468 default: 469 llvm_unreachable("Unknown branch opcode in Cond"); 470 case AArch64::TBZW: 471 case AArch64::TBZX: 472 CC = AArch64CC::EQ; 473 break; 474 case AArch64::TBNZW: 475 case AArch64::TBNZX: 476 CC = AArch64CC::NE; 477 break; 478 } 479 // cmp reg, #foo is actually ands xzr, reg, #1<<foo. 480 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW) 481 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR) 482 .addReg(Cond[2].getReg()) 483 .addImm( 484 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32)); 485 else 486 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR) 487 .addReg(Cond[2].getReg()) 488 .addImm( 489 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64)); 490 break; 491 } 492 } 493 494 unsigned Opc = 0; 495 const TargetRegisterClass *RC = nullptr; 496 bool TryFold = false; 497 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) { 498 RC = &AArch64::GPR64RegClass; 499 Opc = AArch64::CSELXr; 500 TryFold = true; 501 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) { 502 RC = &AArch64::GPR32RegClass; 503 Opc = AArch64::CSELWr; 504 TryFold = true; 505 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) { 506 RC = &AArch64::FPR64RegClass; 507 Opc = AArch64::FCSELDrrr; 508 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) { 509 RC = &AArch64::FPR32RegClass; 510 Opc = AArch64::FCSELSrrr; 511 } 512 assert(RC && "Unsupported regclass"); 513 514 // Try folding simple instructions into the csel. 515 if (TryFold) { 516 unsigned NewVReg = 0; 517 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg); 518 if (FoldedOpc) { 519 // The folded opcodes csinc, csinc and csneg apply the operation to 520 // FalseReg, so we need to invert the condition. 521 CC = AArch64CC::getInvertedCondCode(CC); 522 TrueReg = FalseReg; 523 } else 524 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg); 525 526 // Fold the operation. Leave any dead instructions for DCE to clean up. 527 if (FoldedOpc) { 528 FalseReg = NewVReg; 529 Opc = FoldedOpc; 530 // The extends the live range of NewVReg. 531 MRI.clearKillFlags(NewVReg); 532 } 533 } 534 535 // Pull all virtual register into the appropriate class. 536 MRI.constrainRegClass(TrueReg, RC); 537 MRI.constrainRegClass(FalseReg, RC); 538 539 // Insert the csel. 540 BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm( 541 CC); 542} 543 544// FIXME: this implementation should be micro-architecture dependent, so a 545// micro-architecture target hook should be introduced here in future. 546bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const { 547 if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53()) 548 return MI->isAsCheapAsAMove(); 549 550 switch (MI->getOpcode()) { 551 default: 552 return false; 553 554 // add/sub on register without shift 555 case AArch64::ADDWri: 556 case AArch64::ADDXri: 557 case AArch64::SUBWri: 558 case AArch64::SUBXri: 559 return (MI->getOperand(3).getImm() == 0); 560 561 // logical ops on immediate 562 case AArch64::ANDWri: 563 case AArch64::ANDXri: 564 case AArch64::EORWri: 565 case AArch64::EORXri: 566 case AArch64::ORRWri: 567 case AArch64::ORRXri: 568 return true; 569 570 // logical ops on register without shift 571 case AArch64::ANDWrr: 572 case AArch64::ANDXrr: 573 case AArch64::BICWrr: 574 case AArch64::BICXrr: 575 case AArch64::EONWrr: 576 case AArch64::EONXrr: 577 case AArch64::EORWrr: 578 case AArch64::EORXrr: 579 case AArch64::ORNWrr: 580 case AArch64::ORNXrr: 581 case AArch64::ORRWrr: 582 case AArch64::ORRXrr: 583 return true; 584 } 585 586 llvm_unreachable("Unknown opcode to check as cheap as a move!"); 587} 588 589bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, 590 unsigned &SrcReg, unsigned &DstReg, 591 unsigned &SubIdx) const { 592 switch (MI.getOpcode()) { 593 default: 594 return false; 595 case AArch64::SBFMXri: // aka sxtw 596 case AArch64::UBFMXri: // aka uxtw 597 // Check for the 32 -> 64 bit extension case, these instructions can do 598 // much more. 599 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) 600 return false; 601 // This is a signed or unsigned 32 -> 64 bit extension. 602 SrcReg = MI.getOperand(1).getReg(); 603 DstReg = MI.getOperand(0).getReg(); 604 SubIdx = AArch64::sub_32; 605 return true; 606 } 607} 608 609/// analyzeCompare - For a comparison instruction, return the source registers 610/// in SrcReg and SrcReg2, and the value it compares against in CmpValue. 611/// Return true if the comparison instruction can be analyzed. 612bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, 613 unsigned &SrcReg2, int &CmpMask, 614 int &CmpValue) const { 615 switch (MI->getOpcode()) { 616 default: 617 break; 618 case AArch64::SUBSWrr: 619 case AArch64::SUBSWrs: 620 case AArch64::SUBSWrx: 621 case AArch64::SUBSXrr: 622 case AArch64::SUBSXrs: 623 case AArch64::SUBSXrx: 624 case AArch64::ADDSWrr: 625 case AArch64::ADDSWrs: 626 case AArch64::ADDSWrx: 627 case AArch64::ADDSXrr: 628 case AArch64::ADDSXrs: 629 case AArch64::ADDSXrx: 630 // Replace SUBSWrr with SUBWrr if NZCV is not used. 631 SrcReg = MI->getOperand(1).getReg(); 632 SrcReg2 = MI->getOperand(2).getReg(); 633 CmpMask = ~0; 634 CmpValue = 0; 635 return true; 636 case AArch64::SUBSWri: 637 case AArch64::ADDSWri: 638 case AArch64::SUBSXri: 639 case AArch64::ADDSXri: 640 SrcReg = MI->getOperand(1).getReg(); 641 SrcReg2 = 0; 642 CmpMask = ~0; 643 CmpValue = MI->getOperand(2).getImm(); 644 return true; 645 case AArch64::ANDSWri: 646 case AArch64::ANDSXri: 647 // ANDS does not use the same encoding scheme as the others xxxS 648 // instructions. 649 SrcReg = MI->getOperand(1).getReg(); 650 SrcReg2 = 0; 651 CmpMask = ~0; 652 CmpValue = AArch64_AM::decodeLogicalImmediate( 653 MI->getOperand(2).getImm(), 654 MI->getOpcode() == AArch64::ANDSWri ? 32 : 64); 655 return true; 656 } 657 658 return false; 659} 660 661static bool UpdateOperandRegClass(MachineInstr *Instr) { 662 MachineBasicBlock *MBB = Instr->getParent(); 663 assert(MBB && "Can't get MachineBasicBlock here"); 664 MachineFunction *MF = MBB->getParent(); 665 assert(MF && "Can't get MachineFunction here"); 666 const TargetMachine *TM = &MF->getTarget(); 667 const TargetInstrInfo *TII = TM->getInstrInfo(); 668 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 669 MachineRegisterInfo *MRI = &MF->getRegInfo(); 670 671 for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; 672 ++OpIdx) { 673 MachineOperand &MO = Instr->getOperand(OpIdx); 674 const TargetRegisterClass *OpRegCstraints = 675 Instr->getRegClassConstraint(OpIdx, TII, TRI); 676 677 // If there's no constraint, there's nothing to do. 678 if (!OpRegCstraints) 679 continue; 680 // If the operand is a frame index, there's nothing to do here. 681 // A frame index operand will resolve correctly during PEI. 682 if (MO.isFI()) 683 continue; 684 685 assert(MO.isReg() && 686 "Operand has register constraints without being a register!"); 687 688 unsigned Reg = MO.getReg(); 689 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 690 if (!OpRegCstraints->contains(Reg)) 691 return false; 692 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && 693 !MRI->constrainRegClass(Reg, OpRegCstraints)) 694 return false; 695 } 696 697 return true; 698} 699 700/// optimizeCompareInstr - Convert the instruction supplying the argument to the 701/// comparison into one that sets the zero bit in the flags register. 702bool AArch64InstrInfo::optimizeCompareInstr( 703 MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, 704 int CmpValue, const MachineRegisterInfo *MRI) const { 705 706 // Replace SUBSWrr with SUBWrr if NZCV is not used. 707 int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); 708 if (Cmp_NZCV != -1) { 709 unsigned NewOpc; 710 switch (CmpInstr->getOpcode()) { 711 default: 712 return false; 713 case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; 714 case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break; 715 case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break; 716 case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break; 717 case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break; 718 case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break; 719 case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break; 720 case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break; 721 case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break; 722 case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break; 723 case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break; 724 case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break; 725 case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break; 726 case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break; 727 case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; 728 case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; 729 } 730 731 const MCInstrDesc &MCID = get(NewOpc); 732 CmpInstr->setDesc(MCID); 733 CmpInstr->RemoveOperand(Cmp_NZCV); 734 bool succeeded = UpdateOperandRegClass(CmpInstr); 735 (void)succeeded; 736 assert(succeeded && "Some operands reg class are incompatible!"); 737 return true; 738 } 739 740 // Continue only if we have a "ri" where immediate is zero. 741 if (CmpValue != 0 || SrcReg2 != 0) 742 return false; 743 744 // CmpInstr is a Compare instruction if destination register is not used. 745 if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) 746 return false; 747 748 // Get the unique definition of SrcReg. 749 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); 750 if (!MI) 751 return false; 752 753 // We iterate backward, starting from the instruction before CmpInstr and 754 // stop when reaching the definition of the source register or done with the 755 // basic block, to check whether NZCV is used or modified in between. 756 MachineBasicBlock::iterator I = CmpInstr, E = MI, 757 B = CmpInstr->getParent()->begin(); 758 759 // Early exit if CmpInstr is at the beginning of the BB. 760 if (I == B) 761 return false; 762 763 // Check whether the definition of SrcReg is in the same basic block as 764 // Compare. If not, we can't optimize away the Compare. 765 if (MI->getParent() != CmpInstr->getParent()) 766 return false; 767 768 // Check that NZCV isn't set between the comparison instruction and the one we 769 // want to change. 770 const TargetRegisterInfo *TRI = &getRegisterInfo(); 771 for (--I; I != E; --I) { 772 const MachineInstr &Instr = *I; 773 774 if (Instr.modifiesRegister(AArch64::NZCV, TRI) || 775 Instr.readsRegister(AArch64::NZCV, TRI)) 776 // This instruction modifies or uses NZCV after the one we want to 777 // change. We can't do this transformation. 778 return false; 779 if (I == B) 780 // The 'and' is below the comparison instruction. 781 return false; 782 } 783 784 unsigned NewOpc = MI->getOpcode(); 785 switch (MI->getOpcode()) { 786 default: 787 return false; 788 case AArch64::ADDSWrr: 789 case AArch64::ADDSWri: 790 case AArch64::ADDSXrr: 791 case AArch64::ADDSXri: 792 case AArch64::SUBSWrr: 793 case AArch64::SUBSWri: 794 case AArch64::SUBSXrr: 795 case AArch64::SUBSXri: 796 break; 797 case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; 798 case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; 799 case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; 800 case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; 801 case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; 802 case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; 803 case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; 804 case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; 805 case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; 806 case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; 807 case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; 808 case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; 809 case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; 810 case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; 811 } 812 813 // Scan forward for the use of NZCV. 814 // When checking against MI: if it's a conditional code requires 815 // checking of V bit, then this is not safe to do. 816 // It is safe to remove CmpInstr if NZCV is redefined or killed. 817 // If we are done with the basic block, we need to check whether NZCV is 818 // live-out. 819 bool IsSafe = false; 820 for (MachineBasicBlock::iterator I = CmpInstr, 821 E = CmpInstr->getParent()->end(); 822 !IsSafe && ++I != E;) { 823 const MachineInstr &Instr = *I; 824 for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; 825 ++IO) { 826 const MachineOperand &MO = Instr.getOperand(IO); 827 if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { 828 IsSafe = true; 829 break; 830 } 831 if (!MO.isReg() || MO.getReg() != AArch64::NZCV) 832 continue; 833 if (MO.isDef()) { 834 IsSafe = true; 835 break; 836 } 837 838 // Decode the condition code. 839 unsigned Opc = Instr.getOpcode(); 840 AArch64CC::CondCode CC; 841 switch (Opc) { 842 default: 843 return false; 844 case AArch64::Bcc: 845 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); 846 break; 847 case AArch64::CSINVWr: 848 case AArch64::CSINVXr: 849 case AArch64::CSINCWr: 850 case AArch64::CSINCXr: 851 case AArch64::CSELWr: 852 case AArch64::CSELXr: 853 case AArch64::CSNEGWr: 854 case AArch64::CSNEGXr: 855 case AArch64::FCSELSrrr: 856 case AArch64::FCSELDrrr: 857 CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); 858 break; 859 } 860 861 // It is not safe to remove Compare instruction if Overflow(V) is used. 862 switch (CC) { 863 default: 864 // NZCV can be used multiple times, we should continue. 865 break; 866 case AArch64CC::VS: 867 case AArch64CC::VC: 868 case AArch64CC::GE: 869 case AArch64CC::LT: 870 case AArch64CC::GT: 871 case AArch64CC::LE: 872 return false; 873 } 874 } 875 } 876 877 // If NZCV is not killed nor re-defined, we should check whether it is 878 // live-out. If it is live-out, do not optimize. 879 if (!IsSafe) { 880 MachineBasicBlock *ParentBlock = CmpInstr->getParent(); 881 for (auto *MBB : ParentBlock->successors()) 882 if (MBB->isLiveIn(AArch64::NZCV)) 883 return false; 884 } 885 886 // Update the instruction to set NZCV. 887 MI->setDesc(get(NewOpc)); 888 CmpInstr->eraseFromParent(); 889 bool succeeded = UpdateOperandRegClass(MI); 890 (void)succeeded; 891 assert(succeeded && "Some operands reg class are incompatible!"); 892 MI->addRegisterDefined(AArch64::NZCV, TRI); 893 return true; 894} 895 896/// Return true if this is this instruction has a non-zero immediate 897bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { 898 switch (MI->getOpcode()) { 899 default: 900 break; 901 case AArch64::ADDSWrs: 902 case AArch64::ADDSXrs: 903 case AArch64::ADDWrs: 904 case AArch64::ADDXrs: 905 case AArch64::ANDSWrs: 906 case AArch64::ANDSXrs: 907 case AArch64::ANDWrs: 908 case AArch64::ANDXrs: 909 case AArch64::BICSWrs: 910 case AArch64::BICSXrs: 911 case AArch64::BICWrs: 912 case AArch64::BICXrs: 913 case AArch64::CRC32Brr: 914 case AArch64::CRC32CBrr: 915 case AArch64::CRC32CHrr: 916 case AArch64::CRC32CWrr: 917 case AArch64::CRC32CXrr: 918 case AArch64::CRC32Hrr: 919 case AArch64::CRC32Wrr: 920 case AArch64::CRC32Xrr: 921 case AArch64::EONWrs: 922 case AArch64::EONXrs: 923 case AArch64::EORWrs: 924 case AArch64::EORXrs: 925 case AArch64::ORNWrs: 926 case AArch64::ORNXrs: 927 case AArch64::ORRWrs: 928 case AArch64::ORRXrs: 929 case AArch64::SUBSWrs: 930 case AArch64::SUBSXrs: 931 case AArch64::SUBWrs: 932 case AArch64::SUBXrs: 933 if (MI->getOperand(3).isImm()) { 934 unsigned val = MI->getOperand(3).getImm(); 935 return (val != 0); 936 } 937 break; 938 } 939 return false; 940} 941 942/// Return true if this is this instruction has a non-zero immediate 943bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { 944 switch (MI->getOpcode()) { 945 default: 946 break; 947 case AArch64::ADDSWrx: 948 case AArch64::ADDSXrx: 949 case AArch64::ADDSXrx64: 950 case AArch64::ADDWrx: 951 case AArch64::ADDXrx: 952 case AArch64::ADDXrx64: 953 case AArch64::SUBSWrx: 954 case AArch64::SUBSXrx: 955 case AArch64::SUBSXrx64: 956 case AArch64::SUBWrx: 957 case AArch64::SUBXrx: 958 case AArch64::SUBXrx64: 959 if (MI->getOperand(3).isImm()) { 960 unsigned val = MI->getOperand(3).getImm(); 961 return (val != 0); 962 } 963 break; 964 } 965 966 return false; 967} 968 969// Return true if this instruction simply sets its single destination register 970// to zero. This is equivalent to a register rename of the zero-register. 971bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { 972 switch (MI->getOpcode()) { 973 default: 974 break; 975 case AArch64::MOVZWi: 976 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) 977 if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { 978 assert(MI->getDesc().getNumOperands() == 3 && 979 MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); 980 return true; 981 } 982 break; 983 case AArch64::ANDWri: // and Rd, Rzr, #imm 984 return MI->getOperand(1).getReg() == AArch64::WZR; 985 case AArch64::ANDXri: 986 return MI->getOperand(1).getReg() == AArch64::XZR; 987 case TargetOpcode::COPY: 988 return MI->getOperand(1).getReg() == AArch64::WZR; 989 } 990 return false; 991} 992 993// Return true if this instruction simply renames a general register without 994// modifying bits. 995bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { 996 switch (MI->getOpcode()) { 997 default: 998 break; 999 case TargetOpcode::COPY: { 1000 // GPR32 copies will by lowered to ORRXrs 1001 unsigned DstReg = MI->getOperand(0).getReg(); 1002 return (AArch64::GPR32RegClass.contains(DstReg) || 1003 AArch64::GPR64RegClass.contains(DstReg)); 1004 } 1005 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) 1006 if (MI->getOperand(1).getReg() == AArch64::XZR) { 1007 assert(MI->getDesc().getNumOperands() == 4 && 1008 MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); 1009 return true; 1010 } 1011 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) 1012 if (MI->getOperand(2).getImm() == 0) { 1013 assert(MI->getDesc().getNumOperands() == 4 && 1014 MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); 1015 return true; 1016 } 1017 } 1018 return false; 1019} 1020 1021// Return true if this instruction simply renames a general register without 1022// modifying bits. 1023bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { 1024 switch (MI->getOpcode()) { 1025 default: 1026 break; 1027 case TargetOpcode::COPY: { 1028 // FPR64 copies will by lowered to ORR.16b 1029 unsigned DstReg = MI->getOperand(0).getReg(); 1030 return (AArch64::FPR64RegClass.contains(DstReg) || 1031 AArch64::FPR128RegClass.contains(DstReg)); 1032 } 1033 case AArch64::ORRv16i8: 1034 if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { 1035 assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && 1036 "invalid ORRv16i8 operands"); 1037 return true; 1038 } 1039 } 1040 return false; 1041} 1042 1043unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 1044 int &FrameIndex) const { 1045 switch (MI->getOpcode()) { 1046 default: 1047 break; 1048 case AArch64::LDRWui: 1049 case AArch64::LDRXui: 1050 case AArch64::LDRBui: 1051 case AArch64::LDRHui: 1052 case AArch64::LDRSui: 1053 case AArch64::LDRDui: 1054 case AArch64::LDRQui: 1055 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1056 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1057 FrameIndex = MI->getOperand(1).getIndex(); 1058 return MI->getOperand(0).getReg(); 1059 } 1060 break; 1061 } 1062 1063 return 0; 1064} 1065 1066unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 1067 int &FrameIndex) const { 1068 switch (MI->getOpcode()) { 1069 default: 1070 break; 1071 case AArch64::STRWui: 1072 case AArch64::STRXui: 1073 case AArch64::STRBui: 1074 case AArch64::STRHui: 1075 case AArch64::STRSui: 1076 case AArch64::STRDui: 1077 case AArch64::STRQui: 1078 if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && 1079 MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { 1080 FrameIndex = MI->getOperand(1).getIndex(); 1081 return MI->getOperand(0).getReg(); 1082 } 1083 break; 1084 } 1085 return 0; 1086} 1087 1088/// Return true if this is load/store scales or extends its register offset. 1089/// This refers to scaling a dynamic index as opposed to scaled immediates. 1090/// MI should be a memory op that allows scaled addressing. 1091bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { 1092 switch (MI->getOpcode()) { 1093 default: 1094 break; 1095 case AArch64::LDRBBroW: 1096 case AArch64::LDRBroW: 1097 case AArch64::LDRDroW: 1098 case AArch64::LDRHHroW: 1099 case AArch64::LDRHroW: 1100 case AArch64::LDRQroW: 1101 case AArch64::LDRSBWroW: 1102 case AArch64::LDRSBXroW: 1103 case AArch64::LDRSHWroW: 1104 case AArch64::LDRSHXroW: 1105 case AArch64::LDRSWroW: 1106 case AArch64::LDRSroW: 1107 case AArch64::LDRWroW: 1108 case AArch64::LDRXroW: 1109 case AArch64::STRBBroW: 1110 case AArch64::STRBroW: 1111 case AArch64::STRDroW: 1112 case AArch64::STRHHroW: 1113 case AArch64::STRHroW: 1114 case AArch64::STRQroW: 1115 case AArch64::STRSroW: 1116 case AArch64::STRWroW: 1117 case AArch64::STRXroW: 1118 case AArch64::LDRBBroX: 1119 case AArch64::LDRBroX: 1120 case AArch64::LDRDroX: 1121 case AArch64::LDRHHroX: 1122 case AArch64::LDRHroX: 1123 case AArch64::LDRQroX: 1124 case AArch64::LDRSBWroX: 1125 case AArch64::LDRSBXroX: 1126 case AArch64::LDRSHWroX: 1127 case AArch64::LDRSHXroX: 1128 case AArch64::LDRSWroX: 1129 case AArch64::LDRSroX: 1130 case AArch64::LDRWroX: 1131 case AArch64::LDRXroX: 1132 case AArch64::STRBBroX: 1133 case AArch64::STRBroX: 1134 case AArch64::STRDroX: 1135 case AArch64::STRHHroX: 1136 case AArch64::STRHroX: 1137 case AArch64::STRQroX: 1138 case AArch64::STRSroX: 1139 case AArch64::STRWroX: 1140 case AArch64::STRXroX: 1141 1142 unsigned Val = MI->getOperand(3).getImm(); 1143 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); 1144 return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); 1145 } 1146 return false; 1147} 1148 1149/// Check all MachineMemOperands for a hint to suppress pairing. 1150bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { 1151 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1152 "Too many target MO flags"); 1153 for (auto *MM : MI->memoperands()) { 1154 if (MM->getFlags() & 1155 (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { 1156 return true; 1157 } 1158 } 1159 return false; 1160} 1161 1162/// Set a flag on the first MachineMemOperand to suppress pairing. 1163void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { 1164 if (MI->memoperands_empty()) 1165 return; 1166 1167 assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && 1168 "Too many target MO flags"); 1169 (*MI->memoperands_begin()) 1170 ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); 1171} 1172 1173bool 1174AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, 1175 unsigned &Offset, 1176 const TargetRegisterInfo *TRI) const { 1177 switch (LdSt->getOpcode()) { 1178 default: 1179 return false; 1180 case AArch64::STRSui: 1181 case AArch64::STRDui: 1182 case AArch64::STRQui: 1183 case AArch64::STRXui: 1184 case AArch64::STRWui: 1185 case AArch64::LDRSui: 1186 case AArch64::LDRDui: 1187 case AArch64::LDRQui: 1188 case AArch64::LDRXui: 1189 case AArch64::LDRWui: 1190 if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) 1191 return false; 1192 BaseReg = LdSt->getOperand(1).getReg(); 1193 MachineFunction &MF = *LdSt->getParent()->getParent(); 1194 unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); 1195 Offset = LdSt->getOperand(2).getImm() * Width; 1196 return true; 1197 }; 1198} 1199 1200/// Detect opportunities for ldp/stp formation. 1201/// 1202/// Only called for LdSt for which getLdStBaseRegImmOfs returns true. 1203bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, 1204 MachineInstr *SecondLdSt, 1205 unsigned NumLoads) const { 1206 // Only cluster up to a single pair. 1207 if (NumLoads > 1) 1208 return false; 1209 if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) 1210 return false; 1211 // getLdStBaseRegImmOfs guarantees that oper 2 isImm. 1212 unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); 1213 // Allow 6 bits of positive range. 1214 if (Ofs1 > 64) 1215 return false; 1216 // The caller should already have ordered First/SecondLdSt by offset. 1217 unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); 1218 return Ofs1 + 1 == Ofs2; 1219} 1220 1221bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, 1222 MachineInstr *Second) const { 1223 // Cyclone can fuse CMN, CMP followed by Bcc. 1224 1225 // FIXME: B0 can also fuse: 1226 // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. 1227 if (Second->getOpcode() != AArch64::Bcc) 1228 return false; 1229 switch (First->getOpcode()) { 1230 default: 1231 return false; 1232 case AArch64::SUBSWri: 1233 case AArch64::ADDSWri: 1234 case AArch64::ANDSWri: 1235 case AArch64::SUBSXri: 1236 case AArch64::ADDSXri: 1237 case AArch64::ANDSXri: 1238 return true; 1239 } 1240} 1241 1242MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, 1243 int FrameIx, 1244 uint64_t Offset, 1245 const MDNode *MDPtr, 1246 DebugLoc DL) const { 1247 MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) 1248 .addFrameIndex(FrameIx) 1249 .addImm(0) 1250 .addImm(Offset) 1251 .addMetadata(MDPtr); 1252 return &*MIB; 1253} 1254 1255static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, 1256 unsigned Reg, unsigned SubIdx, 1257 unsigned State, 1258 const TargetRegisterInfo *TRI) { 1259 if (!SubIdx) 1260 return MIB.addReg(Reg, State); 1261 1262 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1263 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); 1264 return MIB.addReg(Reg, State, SubIdx); 1265} 1266 1267static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, 1268 unsigned NumRegs) { 1269 // We really want the positive remainder mod 32 here, that happens to be 1270 // easily obtainable with a mask. 1271 return ((DestReg - SrcReg) & 0x1f) < NumRegs; 1272} 1273 1274void AArch64InstrInfo::copyPhysRegTuple( 1275 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, 1276 unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, 1277 llvm::ArrayRef<unsigned> Indices) const { 1278 assert(Subtarget.hasNEON() && 1279 "Unexpected register copy without NEON"); 1280 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1281 uint16_t DestEncoding = TRI->getEncodingValue(DestReg); 1282 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 1283 unsigned NumRegs = Indices.size(); 1284 1285 int SubReg = 0, End = NumRegs, Incr = 1; 1286 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { 1287 SubReg = NumRegs - 1; 1288 End = -1; 1289 Incr = -1; 1290 } 1291 1292 for (; SubReg != End; SubReg += Incr) { 1293 const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); 1294 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); 1295 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); 1296 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); 1297 } 1298} 1299 1300void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 1301 MachineBasicBlock::iterator I, DebugLoc DL, 1302 unsigned DestReg, unsigned SrcReg, 1303 bool KillSrc) const { 1304 if (AArch64::GPR32spRegClass.contains(DestReg) && 1305 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { 1306 const TargetRegisterInfo *TRI = &getRegisterInfo(); 1307 1308 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { 1309 // If either operand is WSP, expand to ADD #0. 1310 if (Subtarget.hasZeroCycleRegMove()) { 1311 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. 1312 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1313 &AArch64::GPR64spRegClass); 1314 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1315 &AArch64::GPR64spRegClass); 1316 // This instruction is reading and writing X registers. This may upset 1317 // the register scavenger and machine verifier, so we need to indicate 1318 // that we are reading an undefined value from SrcRegX, but a proper 1319 // value from SrcReg. 1320 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) 1321 .addReg(SrcRegX, RegState::Undef) 1322 .addImm(0) 1323 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1324 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1325 } else { 1326 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) 1327 .addReg(SrcReg, getKillRegState(KillSrc)) 1328 .addImm(0) 1329 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1330 } 1331 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { 1332 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( 1333 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1334 } else { 1335 if (Subtarget.hasZeroCycleRegMove()) { 1336 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. 1337 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, 1338 &AArch64::GPR64spRegClass); 1339 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, 1340 &AArch64::GPR64spRegClass); 1341 // This instruction is reading and writing X registers. This may upset 1342 // the register scavenger and machine verifier, so we need to indicate 1343 // that we are reading an undefined value from SrcRegX, but a proper 1344 // value from SrcReg. 1345 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) 1346 .addReg(AArch64::XZR) 1347 .addReg(SrcRegX, RegState::Undef) 1348 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); 1349 } else { 1350 // Otherwise, expand to ORR WZR. 1351 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) 1352 .addReg(AArch64::WZR) 1353 .addReg(SrcReg, getKillRegState(KillSrc)); 1354 } 1355 } 1356 return; 1357 } 1358 1359 if (AArch64::GPR64spRegClass.contains(DestReg) && 1360 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { 1361 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { 1362 // If either operand is SP, expand to ADD #0. 1363 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) 1364 .addReg(SrcReg, getKillRegState(KillSrc)) 1365 .addImm(0) 1366 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1367 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { 1368 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( 1369 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1370 } else { 1371 // Otherwise, expand to ORR XZR. 1372 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) 1373 .addReg(AArch64::XZR) 1374 .addReg(SrcReg, getKillRegState(KillSrc)); 1375 } 1376 return; 1377 } 1378 1379 // Copy a DDDD register quad by copying the individual sub-registers. 1380 if (AArch64::DDDDRegClass.contains(DestReg) && 1381 AArch64::DDDDRegClass.contains(SrcReg)) { 1382 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1383 AArch64::dsub2, AArch64::dsub3 }; 1384 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1385 Indices); 1386 return; 1387 } 1388 1389 // Copy a DDD register triple by copying the individual sub-registers. 1390 if (AArch64::DDDRegClass.contains(DestReg) && 1391 AArch64::DDDRegClass.contains(SrcReg)) { 1392 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, 1393 AArch64::dsub2 }; 1394 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1395 Indices); 1396 return; 1397 } 1398 1399 // Copy a DD register pair by copying the individual sub-registers. 1400 if (AArch64::DDRegClass.contains(DestReg) && 1401 AArch64::DDRegClass.contains(SrcReg)) { 1402 static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; 1403 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, 1404 Indices); 1405 return; 1406 } 1407 1408 // Copy a QQQQ register quad by copying the individual sub-registers. 1409 if (AArch64::QQQQRegClass.contains(DestReg) && 1410 AArch64::QQQQRegClass.contains(SrcReg)) { 1411 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1412 AArch64::qsub2, AArch64::qsub3 }; 1413 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1414 Indices); 1415 return; 1416 } 1417 1418 // Copy a QQQ register triple by copying the individual sub-registers. 1419 if (AArch64::QQQRegClass.contains(DestReg) && 1420 AArch64::QQQRegClass.contains(SrcReg)) { 1421 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, 1422 AArch64::qsub2 }; 1423 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1424 Indices); 1425 return; 1426 } 1427 1428 // Copy a QQ register pair by copying the individual sub-registers. 1429 if (AArch64::QQRegClass.contains(DestReg) && 1430 AArch64::QQRegClass.contains(SrcReg)) { 1431 static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; 1432 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, 1433 Indices); 1434 return; 1435 } 1436 1437 if (AArch64::FPR128RegClass.contains(DestReg) && 1438 AArch64::FPR128RegClass.contains(SrcReg)) { 1439 if(Subtarget.hasNEON()) { 1440 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1441 .addReg(SrcReg) 1442 .addReg(SrcReg, getKillRegState(KillSrc)); 1443 } else { 1444 BuildMI(MBB, I, DL, get(AArch64::STRQpre)) 1445 .addReg(AArch64::SP, RegState::Define) 1446 .addReg(SrcReg, getKillRegState(KillSrc)) 1447 .addReg(AArch64::SP) 1448 .addImm(-16); 1449 BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) 1450 .addReg(AArch64::SP, RegState::Define) 1451 .addReg(DestReg, RegState::Define) 1452 .addReg(AArch64::SP) 1453 .addImm(16); 1454 } 1455 return; 1456 } 1457 1458 if (AArch64::FPR64RegClass.contains(DestReg) && 1459 AArch64::FPR64RegClass.contains(SrcReg)) { 1460 if(Subtarget.hasNEON()) { 1461 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, 1462 &AArch64::FPR128RegClass); 1463 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, 1464 &AArch64::FPR128RegClass); 1465 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1466 .addReg(SrcReg) 1467 .addReg(SrcReg, getKillRegState(KillSrc)); 1468 } else { 1469 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) 1470 .addReg(SrcReg, getKillRegState(KillSrc)); 1471 } 1472 return; 1473 } 1474 1475 if (AArch64::FPR32RegClass.contains(DestReg) && 1476 AArch64::FPR32RegClass.contains(SrcReg)) { 1477 if(Subtarget.hasNEON()) { 1478 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, 1479 &AArch64::FPR128RegClass); 1480 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, 1481 &AArch64::FPR128RegClass); 1482 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1483 .addReg(SrcReg) 1484 .addReg(SrcReg, getKillRegState(KillSrc)); 1485 } else { 1486 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1487 .addReg(SrcReg, getKillRegState(KillSrc)); 1488 } 1489 return; 1490 } 1491 1492 if (AArch64::FPR16RegClass.contains(DestReg) && 1493 AArch64::FPR16RegClass.contains(SrcReg)) { 1494 if(Subtarget.hasNEON()) { 1495 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1496 &AArch64::FPR128RegClass); 1497 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1498 &AArch64::FPR128RegClass); 1499 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1500 .addReg(SrcReg) 1501 .addReg(SrcReg, getKillRegState(KillSrc)); 1502 } else { 1503 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, 1504 &AArch64::FPR32RegClass); 1505 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, 1506 &AArch64::FPR32RegClass); 1507 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1508 .addReg(SrcReg, getKillRegState(KillSrc)); 1509 } 1510 return; 1511 } 1512 1513 if (AArch64::FPR8RegClass.contains(DestReg) && 1514 AArch64::FPR8RegClass.contains(SrcReg)) { 1515 if(Subtarget.hasNEON()) { 1516 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1517 &AArch64::FPR128RegClass); 1518 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1519 &AArch64::FPR128RegClass); 1520 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) 1521 .addReg(SrcReg) 1522 .addReg(SrcReg, getKillRegState(KillSrc)); 1523 } else { 1524 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, 1525 &AArch64::FPR32RegClass); 1526 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, 1527 &AArch64::FPR32RegClass); 1528 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) 1529 .addReg(SrcReg, getKillRegState(KillSrc)); 1530 } 1531 return; 1532 } 1533 1534 // Copies between GPR64 and FPR64. 1535 if (AArch64::FPR64RegClass.contains(DestReg) && 1536 AArch64::GPR64RegClass.contains(SrcReg)) { 1537 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) 1538 .addReg(SrcReg, getKillRegState(KillSrc)); 1539 return; 1540 } 1541 if (AArch64::GPR64RegClass.contains(DestReg) && 1542 AArch64::FPR64RegClass.contains(SrcReg)) { 1543 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) 1544 .addReg(SrcReg, getKillRegState(KillSrc)); 1545 return; 1546 } 1547 // Copies between GPR32 and FPR32. 1548 if (AArch64::FPR32RegClass.contains(DestReg) && 1549 AArch64::GPR32RegClass.contains(SrcReg)) { 1550 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) 1551 .addReg(SrcReg, getKillRegState(KillSrc)); 1552 return; 1553 } 1554 if (AArch64::GPR32RegClass.contains(DestReg) && 1555 AArch64::FPR32RegClass.contains(SrcReg)) { 1556 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) 1557 .addReg(SrcReg, getKillRegState(KillSrc)); 1558 return; 1559 } 1560 1561 if (DestReg == AArch64::NZCV) { 1562 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); 1563 BuildMI(MBB, I, DL, get(AArch64::MSR)) 1564 .addImm(AArch64SysReg::NZCV) 1565 .addReg(SrcReg, getKillRegState(KillSrc)) 1566 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); 1567 return; 1568 } 1569 1570 if (SrcReg == AArch64::NZCV) { 1571 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); 1572 BuildMI(MBB, I, DL, get(AArch64::MRS)) 1573 .addReg(DestReg) 1574 .addImm(AArch64SysReg::NZCV) 1575 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); 1576 return; 1577 } 1578 1579 llvm_unreachable("unimplemented reg-to-reg copy"); 1580} 1581 1582void AArch64InstrInfo::storeRegToStackSlot( 1583 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, 1584 bool isKill, int FI, const TargetRegisterClass *RC, 1585 const TargetRegisterInfo *TRI) const { 1586 DebugLoc DL; 1587 if (MBBI != MBB.end()) 1588 DL = MBBI->getDebugLoc(); 1589 MachineFunction &MF = *MBB.getParent(); 1590 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1591 unsigned Align = MFI.getObjectAlignment(FI); 1592 1593 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 1594 MachineMemOperand *MMO = MF.getMachineMemOperand( 1595 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); 1596 unsigned Opc = 0; 1597 bool Offset = true; 1598 switch (RC->getSize()) { 1599 case 1: 1600 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1601 Opc = AArch64::STRBui; 1602 break; 1603 case 2: 1604 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1605 Opc = AArch64::STRHui; 1606 break; 1607 case 4: 1608 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1609 Opc = AArch64::STRWui; 1610 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1611 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); 1612 else 1613 assert(SrcReg != AArch64::WSP); 1614 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1615 Opc = AArch64::STRSui; 1616 break; 1617 case 8: 1618 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1619 Opc = AArch64::STRXui; 1620 if (TargetRegisterInfo::isVirtualRegister(SrcReg)) 1621 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 1622 else 1623 assert(SrcReg != AArch64::SP); 1624 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1625 Opc = AArch64::STRDui; 1626 break; 1627 case 16: 1628 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1629 Opc = AArch64::STRQui; 1630 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1631 assert(Subtarget.hasNEON() && 1632 "Unexpected register store without NEON"); 1633 Opc = AArch64::ST1Twov1d, Offset = false; 1634 } 1635 break; 1636 case 24: 1637 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1638 assert(Subtarget.hasNEON() && 1639 "Unexpected register store without NEON"); 1640 Opc = AArch64::ST1Threev1d, Offset = false; 1641 } 1642 break; 1643 case 32: 1644 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1645 assert(Subtarget.hasNEON() && 1646 "Unexpected register store without NEON"); 1647 Opc = AArch64::ST1Fourv1d, Offset = false; 1648 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1649 assert(Subtarget.hasNEON() && 1650 "Unexpected register store without NEON"); 1651 Opc = AArch64::ST1Twov2d, Offset = false; 1652 } 1653 break; 1654 case 48: 1655 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1656 assert(Subtarget.hasNEON() && 1657 "Unexpected register store without NEON"); 1658 Opc = AArch64::ST1Threev2d, Offset = false; 1659 } 1660 break; 1661 case 64: 1662 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1663 assert(Subtarget.hasNEON() && 1664 "Unexpected register store without NEON"); 1665 Opc = AArch64::ST1Fourv2d, Offset = false; 1666 } 1667 break; 1668 } 1669 assert(Opc && "Unknown register class"); 1670 1671 const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) 1672 .addReg(SrcReg, getKillRegState(isKill)) 1673 .addFrameIndex(FI); 1674 1675 if (Offset) 1676 MI.addImm(0); 1677 MI.addMemOperand(MMO); 1678} 1679 1680void AArch64InstrInfo::loadRegFromStackSlot( 1681 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, 1682 int FI, const TargetRegisterClass *RC, 1683 const TargetRegisterInfo *TRI) const { 1684 DebugLoc DL; 1685 if (MBBI != MBB.end()) 1686 DL = MBBI->getDebugLoc(); 1687 MachineFunction &MF = *MBB.getParent(); 1688 MachineFrameInfo &MFI = *MF.getFrameInfo(); 1689 unsigned Align = MFI.getObjectAlignment(FI); 1690 MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); 1691 MachineMemOperand *MMO = MF.getMachineMemOperand( 1692 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); 1693 1694 unsigned Opc = 0; 1695 bool Offset = true; 1696 switch (RC->getSize()) { 1697 case 1: 1698 if (AArch64::FPR8RegClass.hasSubClassEq(RC)) 1699 Opc = AArch64::LDRBui; 1700 break; 1701 case 2: 1702 if (AArch64::FPR16RegClass.hasSubClassEq(RC)) 1703 Opc = AArch64::LDRHui; 1704 break; 1705 case 4: 1706 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { 1707 Opc = AArch64::LDRWui; 1708 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1709 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); 1710 else 1711 assert(DestReg != AArch64::WSP); 1712 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) 1713 Opc = AArch64::LDRSui; 1714 break; 1715 case 8: 1716 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { 1717 Opc = AArch64::LDRXui; 1718 if (TargetRegisterInfo::isVirtualRegister(DestReg)) 1719 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); 1720 else 1721 assert(DestReg != AArch64::SP); 1722 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) 1723 Opc = AArch64::LDRDui; 1724 break; 1725 case 16: 1726 if (AArch64::FPR128RegClass.hasSubClassEq(RC)) 1727 Opc = AArch64::LDRQui; 1728 else if (AArch64::DDRegClass.hasSubClassEq(RC)) { 1729 assert(Subtarget.hasNEON() && 1730 "Unexpected register load without NEON"); 1731 Opc = AArch64::LD1Twov1d, Offset = false; 1732 } 1733 break; 1734 case 24: 1735 if (AArch64::DDDRegClass.hasSubClassEq(RC)) { 1736 assert(Subtarget.hasNEON() && 1737 "Unexpected register load without NEON"); 1738 Opc = AArch64::LD1Threev1d, Offset = false; 1739 } 1740 break; 1741 case 32: 1742 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { 1743 assert(Subtarget.hasNEON() && 1744 "Unexpected register load without NEON"); 1745 Opc = AArch64::LD1Fourv1d, Offset = false; 1746 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { 1747 assert(Subtarget.hasNEON() && 1748 "Unexpected register load without NEON"); 1749 Opc = AArch64::LD1Twov2d, Offset = false; 1750 } 1751 break; 1752 case 48: 1753 if (AArch64::QQQRegClass.hasSubClassEq(RC)) { 1754 assert(Subtarget.hasNEON() && 1755 "Unexpected register load without NEON"); 1756 Opc = AArch64::LD1Threev2d, Offset = false; 1757 } 1758 break; 1759 case 64: 1760 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { 1761 assert(Subtarget.hasNEON() && 1762 "Unexpected register load without NEON"); 1763 Opc = AArch64::LD1Fourv2d, Offset = false; 1764 } 1765 break; 1766 } 1767 assert(Opc && "Unknown register class"); 1768 1769 const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) 1770 .addReg(DestReg, getDefRegState(true)) 1771 .addFrameIndex(FI); 1772 if (Offset) 1773 MI.addImm(0); 1774 MI.addMemOperand(MMO); 1775} 1776 1777void llvm::emitFrameOffset(MachineBasicBlock &MBB, 1778 MachineBasicBlock::iterator MBBI, DebugLoc DL, 1779 unsigned DestReg, unsigned SrcReg, int Offset, 1780 const TargetInstrInfo *TII, 1781 MachineInstr::MIFlag Flag, bool SetNZCV) { 1782 if (DestReg == SrcReg && Offset == 0) 1783 return; 1784 1785 bool isSub = Offset < 0; 1786 if (isSub) 1787 Offset = -Offset; 1788 1789 // FIXME: If the offset won't fit in 24-bits, compute the offset into a 1790 // scratch register. If DestReg is a virtual register, use it as the 1791 // scratch register; otherwise, create a new virtual register (to be 1792 // replaced by the scavenger at the end of PEI). That case can be optimized 1793 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch 1794 // register can be loaded with offset%8 and the add/sub can use an extending 1795 // instruction with LSL#3. 1796 // Currently the function handles any offsets but generates a poor sequence 1797 // of code. 1798 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); 1799 1800 unsigned Opc; 1801 if (SetNZCV) 1802 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; 1803 else 1804 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; 1805 const unsigned MaxEncoding = 0xfff; 1806 const unsigned ShiftSize = 12; 1807 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; 1808 while (((unsigned)Offset) >= (1 << ShiftSize)) { 1809 unsigned ThisVal; 1810 if (((unsigned)Offset) > MaxEncodableValue) { 1811 ThisVal = MaxEncodableValue; 1812 } else { 1813 ThisVal = Offset & MaxEncodableValue; 1814 } 1815 assert((ThisVal >> ShiftSize) <= MaxEncoding && 1816 "Encoding cannot handle value that big"); 1817 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 1818 .addReg(SrcReg) 1819 .addImm(ThisVal >> ShiftSize) 1820 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) 1821 .setMIFlag(Flag); 1822 1823 SrcReg = DestReg; 1824 Offset -= ThisVal; 1825 if (Offset == 0) 1826 return; 1827 } 1828 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) 1829 .addReg(SrcReg) 1830 .addImm(Offset) 1831 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) 1832 .setMIFlag(Flag); 1833} 1834 1835MachineInstr * 1836AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, 1837 const SmallVectorImpl<unsigned> &Ops, 1838 int FrameIndex) const { 1839 // This is a bit of a hack. Consider this instruction: 1840 // 1841 // %vreg0<def> = COPY %SP; GPR64all:%vreg0 1842 // 1843 // We explicitly chose GPR64all for the virtual register so such a copy might 1844 // be eliminated by RegisterCoalescer. However, that may not be possible, and 1845 // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all 1846 // register class, TargetInstrInfo::foldMemoryOperand() is going to try. 1847 // 1848 // To prevent that, we are going to constrain the %vreg0 register class here. 1849 // 1850 // <rdar://problem/11522048> 1851 // 1852 if (MI->isCopy()) { 1853 unsigned DstReg = MI->getOperand(0).getReg(); 1854 unsigned SrcReg = MI->getOperand(1).getReg(); 1855 if (SrcReg == AArch64::SP && 1856 TargetRegisterInfo::isVirtualRegister(DstReg)) { 1857 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); 1858 return nullptr; 1859 } 1860 if (DstReg == AArch64::SP && 1861 TargetRegisterInfo::isVirtualRegister(SrcReg)) { 1862 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); 1863 return nullptr; 1864 } 1865 } 1866 1867 // Cannot fold. 1868 return nullptr; 1869} 1870 1871int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, 1872 bool *OutUseUnscaledOp, 1873 unsigned *OutUnscaledOp, 1874 int *EmittableOffset) { 1875 int Scale = 1; 1876 bool IsSigned = false; 1877 // The ImmIdx should be changed case by case if it is not 2. 1878 unsigned ImmIdx = 2; 1879 unsigned UnscaledOp = 0; 1880 // Set output values in case of early exit. 1881 if (EmittableOffset) 1882 *EmittableOffset = 0; 1883 if (OutUseUnscaledOp) 1884 *OutUseUnscaledOp = false; 1885 if (OutUnscaledOp) 1886 *OutUnscaledOp = 0; 1887 switch (MI.getOpcode()) { 1888 default: 1889 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); 1890 // Vector spills/fills can't take an immediate offset. 1891 case AArch64::LD1Twov2d: 1892 case AArch64::LD1Threev2d: 1893 case AArch64::LD1Fourv2d: 1894 case AArch64::LD1Twov1d: 1895 case AArch64::LD1Threev1d: 1896 case AArch64::LD1Fourv1d: 1897 case AArch64::ST1Twov2d: 1898 case AArch64::ST1Threev2d: 1899 case AArch64::ST1Fourv2d: 1900 case AArch64::ST1Twov1d: 1901 case AArch64::ST1Threev1d: 1902 case AArch64::ST1Fourv1d: 1903 return AArch64FrameOffsetCannotUpdate; 1904 case AArch64::PRFMui: 1905 Scale = 8; 1906 UnscaledOp = AArch64::PRFUMi; 1907 break; 1908 case AArch64::LDRXui: 1909 Scale = 8; 1910 UnscaledOp = AArch64::LDURXi; 1911 break; 1912 case AArch64::LDRWui: 1913 Scale = 4; 1914 UnscaledOp = AArch64::LDURWi; 1915 break; 1916 case AArch64::LDRBui: 1917 Scale = 1; 1918 UnscaledOp = AArch64::LDURBi; 1919 break; 1920 case AArch64::LDRHui: 1921 Scale = 2; 1922 UnscaledOp = AArch64::LDURHi; 1923 break; 1924 case AArch64::LDRSui: 1925 Scale = 4; 1926 UnscaledOp = AArch64::LDURSi; 1927 break; 1928 case AArch64::LDRDui: 1929 Scale = 8; 1930 UnscaledOp = AArch64::LDURDi; 1931 break; 1932 case AArch64::LDRQui: 1933 Scale = 16; 1934 UnscaledOp = AArch64::LDURQi; 1935 break; 1936 case AArch64::LDRBBui: 1937 Scale = 1; 1938 UnscaledOp = AArch64::LDURBBi; 1939 break; 1940 case AArch64::LDRHHui: 1941 Scale = 2; 1942 UnscaledOp = AArch64::LDURHHi; 1943 break; 1944 case AArch64::LDRSBXui: 1945 Scale = 1; 1946 UnscaledOp = AArch64::LDURSBXi; 1947 break; 1948 case AArch64::LDRSBWui: 1949 Scale = 1; 1950 UnscaledOp = AArch64::LDURSBWi; 1951 break; 1952 case AArch64::LDRSHXui: 1953 Scale = 2; 1954 UnscaledOp = AArch64::LDURSHXi; 1955 break; 1956 case AArch64::LDRSHWui: 1957 Scale = 2; 1958 UnscaledOp = AArch64::LDURSHWi; 1959 break; 1960 case AArch64::LDRSWui: 1961 Scale = 4; 1962 UnscaledOp = AArch64::LDURSWi; 1963 break; 1964 1965 case AArch64::STRXui: 1966 Scale = 8; 1967 UnscaledOp = AArch64::STURXi; 1968 break; 1969 case AArch64::STRWui: 1970 Scale = 4; 1971 UnscaledOp = AArch64::STURWi; 1972 break; 1973 case AArch64::STRBui: 1974 Scale = 1; 1975 UnscaledOp = AArch64::STURBi; 1976 break; 1977 case AArch64::STRHui: 1978 Scale = 2; 1979 UnscaledOp = AArch64::STURHi; 1980 break; 1981 case AArch64::STRSui: 1982 Scale = 4; 1983 UnscaledOp = AArch64::STURSi; 1984 break; 1985 case AArch64::STRDui: 1986 Scale = 8; 1987 UnscaledOp = AArch64::STURDi; 1988 break; 1989 case AArch64::STRQui: 1990 Scale = 16; 1991 UnscaledOp = AArch64::STURQi; 1992 break; 1993 case AArch64::STRBBui: 1994 Scale = 1; 1995 UnscaledOp = AArch64::STURBBi; 1996 break; 1997 case AArch64::STRHHui: 1998 Scale = 2; 1999 UnscaledOp = AArch64::STURHHi; 2000 break; 2001 2002 case AArch64::LDPXi: 2003 case AArch64::LDPDi: 2004 case AArch64::STPXi: 2005 case AArch64::STPDi: 2006 IsSigned = true; 2007 Scale = 8; 2008 break; 2009 case AArch64::LDPQi: 2010 case AArch64::STPQi: 2011 IsSigned = true; 2012 Scale = 16; 2013 break; 2014 case AArch64::LDPWi: 2015 case AArch64::LDPSi: 2016 case AArch64::STPWi: 2017 case AArch64::STPSi: 2018 IsSigned = true; 2019 Scale = 4; 2020 break; 2021 2022 case AArch64::LDURXi: 2023 case AArch64::LDURWi: 2024 case AArch64::LDURBi: 2025 case AArch64::LDURHi: 2026 case AArch64::LDURSi: 2027 case AArch64::LDURDi: 2028 case AArch64::LDURQi: 2029 case AArch64::LDURHHi: 2030 case AArch64::LDURBBi: 2031 case AArch64::LDURSBXi: 2032 case AArch64::LDURSBWi: 2033 case AArch64::LDURSHXi: 2034 case AArch64::LDURSHWi: 2035 case AArch64::LDURSWi: 2036 case AArch64::STURXi: 2037 case AArch64::STURWi: 2038 case AArch64::STURBi: 2039 case AArch64::STURHi: 2040 case AArch64::STURSi: 2041 case AArch64::STURDi: 2042 case AArch64::STURQi: 2043 case AArch64::STURBBi: 2044 case AArch64::STURHHi: 2045 Scale = 1; 2046 break; 2047 } 2048 2049 Offset += MI.getOperand(ImmIdx).getImm() * Scale; 2050 2051 bool useUnscaledOp = false; 2052 // If the offset doesn't match the scale, we rewrite the instruction to 2053 // use the unscaled instruction instead. Likewise, if we have a negative 2054 // offset (and have an unscaled op to use). 2055 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) 2056 useUnscaledOp = true; 2057 2058 // Use an unscaled addressing mode if the instruction has a negative offset 2059 // (or if the instruction is already using an unscaled addressing mode). 2060 unsigned MaskBits; 2061 if (IsSigned) { 2062 // ldp/stp instructions. 2063 MaskBits = 7; 2064 Offset /= Scale; 2065 } else if (UnscaledOp == 0 || useUnscaledOp) { 2066 MaskBits = 9; 2067 IsSigned = true; 2068 Scale = 1; 2069 } else { 2070 MaskBits = 12; 2071 IsSigned = false; 2072 Offset /= Scale; 2073 } 2074 2075 // Attempt to fold address computation. 2076 int MaxOff = (1 << (MaskBits - IsSigned)) - 1; 2077 int MinOff = (IsSigned ? (-MaxOff - 1) : 0); 2078 if (Offset >= MinOff && Offset <= MaxOff) { 2079 if (EmittableOffset) 2080 *EmittableOffset = Offset; 2081 Offset = 0; 2082 } else { 2083 int NewOff = Offset < 0 ? MinOff : MaxOff; 2084 if (EmittableOffset) 2085 *EmittableOffset = NewOff; 2086 Offset = (Offset - NewOff) * Scale; 2087 } 2088 if (OutUseUnscaledOp) 2089 *OutUseUnscaledOp = useUnscaledOp; 2090 if (OutUnscaledOp) 2091 *OutUnscaledOp = UnscaledOp; 2092 return AArch64FrameOffsetCanUpdate | 2093 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); 2094} 2095 2096bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, 2097 unsigned FrameReg, int &Offset, 2098 const AArch64InstrInfo *TII) { 2099 unsigned Opcode = MI.getOpcode(); 2100 unsigned ImmIdx = FrameRegIdx + 1; 2101 2102 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { 2103 Offset += MI.getOperand(ImmIdx).getImm(); 2104 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), 2105 MI.getOperand(0).getReg(), FrameReg, Offset, TII, 2106 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); 2107 MI.eraseFromParent(); 2108 Offset = 0; 2109 return true; 2110 } 2111 2112 int NewOffset; 2113 unsigned UnscaledOp; 2114 bool UseUnscaledOp; 2115 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, 2116 &UnscaledOp, &NewOffset); 2117 if (Status & AArch64FrameOffsetCanUpdate) { 2118 if (Status & AArch64FrameOffsetIsLegal) 2119 // Replace the FrameIndex with FrameReg. 2120 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); 2121 if (UseUnscaledOp) 2122 MI.setDesc(TII->get(UnscaledOp)); 2123 2124 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); 2125 return Offset == 0; 2126 } 2127 2128 return false; 2129} 2130 2131void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { 2132 NopInst.setOpcode(AArch64::HINT); 2133 NopInst.addOperand(MCOperand::CreateImm(0)); 2134} 2135