SIInstrInfo.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief SI Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15 16#include "SIInstrInfo.h" 17#include "AMDGPUTargetMachine.h" 18#include "SIDefines.h" 19#include "SIMachineFunctionInfo.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/MachineRegisterInfo.h" 22#include "llvm/MC/MCInstrDesc.h" 23 24using namespace llvm; 25 26SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) 27 : AMDGPUInstrInfo(tm), 28 RI(tm) { } 29 30//===----------------------------------------------------------------------===// 31// TargetInstrInfo callbacks 32//===----------------------------------------------------------------------===// 33 34void 35SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 36 MachineBasicBlock::iterator MI, DebugLoc DL, 37 unsigned DestReg, unsigned SrcReg, 38 bool KillSrc) const { 39 40 // If we are trying to copy to or from SCC, there is a bug somewhere else in 41 // the backend. While it may be theoretically possible to do this, it should 42 // never be necessary. 43 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 44 45 static const int16_t Sub0_15[] = { 46 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 47 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 48 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 49 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 50 }; 51 52 static const int16_t Sub0_7[] = { 53 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 54 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 55 }; 56 57 static const int16_t Sub0_3[] = { 58 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 59 }; 60 61 static const int16_t Sub0_2[] = { 62 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 63 }; 64 65 static const int16_t Sub0_1[] = { 66 AMDGPU::sub0, AMDGPU::sub1, 0 67 }; 68 69 unsigned Opcode; 70 const int16_t *SubIndices; 71 72 if (AMDGPU::M0 == DestReg) { 73 // Check if M0 isn't already set to this value 74 for (MachineBasicBlock::reverse_iterator E = MBB.rend(), 75 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { 76 77 if (!I->definesRegister(AMDGPU::M0)) 78 continue; 79 80 unsigned Opc = I->getOpcode(); 81 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) 82 break; 83 84 if (!I->readsRegister(SrcReg)) 85 break; 86 87 // The copy isn't necessary 88 return; 89 } 90 } 91 92 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 93 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 94 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 95 .addReg(SrcReg, getKillRegState(KillSrc)); 96 return; 97 98 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 99 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 100 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 101 .addReg(SrcReg, getKillRegState(KillSrc)); 102 return; 103 104 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 105 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 106 Opcode = AMDGPU::S_MOV_B32; 107 SubIndices = Sub0_3; 108 109 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 110 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 111 Opcode = AMDGPU::S_MOV_B32; 112 SubIndices = Sub0_7; 113 114 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 115 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 116 Opcode = AMDGPU::S_MOV_B32; 117 SubIndices = Sub0_15; 118 119 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { 120 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || 121 AMDGPU::SReg_32RegClass.contains(SrcReg)); 122 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 123 .addReg(SrcReg, getKillRegState(KillSrc)); 124 return; 125 126 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 127 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 128 AMDGPU::SReg_64RegClass.contains(SrcReg)); 129 Opcode = AMDGPU::V_MOV_B32_e32; 130 SubIndices = Sub0_1; 131 132 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 133 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 134 Opcode = AMDGPU::V_MOV_B32_e32; 135 SubIndices = Sub0_2; 136 137 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 138 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 139 AMDGPU::SReg_128RegClass.contains(SrcReg)); 140 Opcode = AMDGPU::V_MOV_B32_e32; 141 SubIndices = Sub0_3; 142 143 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 144 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 145 AMDGPU::SReg_256RegClass.contains(SrcReg)); 146 Opcode = AMDGPU::V_MOV_B32_e32; 147 SubIndices = Sub0_7; 148 149 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 150 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 151 AMDGPU::SReg_512RegClass.contains(SrcReg)); 152 Opcode = AMDGPU::V_MOV_B32_e32; 153 SubIndices = Sub0_15; 154 155 } else { 156 llvm_unreachable("Can't copy register!"); 157 } 158 159 while (unsigned SubIdx = *SubIndices++) { 160 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 161 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 162 163 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); 164 165 if (*SubIndices) 166 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 167 } 168} 169 170unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { 171 int NewOpc; 172 173 // Try to map original to commuted opcode 174 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) 175 return NewOpc; 176 177 // Try to map commuted to original opcode 178 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) 179 return NewOpc; 180 181 return Opcode; 182} 183 184void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 185 MachineBasicBlock::iterator MI, 186 unsigned SrcReg, bool isKill, 187 int FrameIndex, 188 const TargetRegisterClass *RC, 189 const TargetRegisterInfo *TRI) const { 190 SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); 191 DebugLoc DL = MBB.findDebugLoc(MI); 192 unsigned KillFlag = isKill ? RegState::Kill : 0; 193 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 194 195 if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { 196 unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent()); 197 198 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR) 199 .addReg(SrcReg, KillFlag) 200 .addImm(Lane); 201 MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane); 202 } else if (RI.isSGPRClass(RC)) { 203 // We are only allowed to create one new instruction when spilling 204 // registers, so we need to use pseudo instruction for vector 205 // registers. 206 // 207 // Reserve a spot in the spill tracker for each sub-register of 208 // the vector register. 209 unsigned NumSubRegs = RC->getSize() / 4; 210 unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(), 211 NumSubRegs); 212 MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, 213 FirstLane); 214 215 unsigned Opcode; 216 switch (RC->getSize() * 8) { 217 case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; 218 case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; 219 case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; 220 case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; 221 default: llvm_unreachable("Cannot spill register class"); 222 } 223 224 BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR) 225 .addReg(SrcReg) 226 .addImm(FrameIndex); 227 } else { 228 llvm_unreachable("VGPR spilling not supported"); 229 } 230} 231 232void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 233 MachineBasicBlock::iterator MI, 234 unsigned DestReg, int FrameIndex, 235 const TargetRegisterClass *RC, 236 const TargetRegisterInfo *TRI) const { 237 SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); 238 DebugLoc DL = MBB.findDebugLoc(MI); 239 if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { 240 SIMachineFunctionInfo::SpilledReg Spill = 241 MFI->SpillTracker.getSpilledReg(FrameIndex); 242 assert(Spill.VGPR); 243 BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) 244 .addReg(Spill.VGPR) 245 .addImm(Spill.Lane); 246 insertNOPs(MI, 3); 247 } else if (RI.isSGPRClass(RC)){ 248 unsigned Opcode; 249 switch(RC->getSize() * 8) { 250 case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; 251 case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; 252 case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; 253 case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; 254 default: llvm_unreachable("Cannot spill register class"); 255 } 256 257 SIMachineFunctionInfo::SpilledReg Spill = 258 MFI->SpillTracker.getSpilledReg(FrameIndex); 259 260 BuildMI(MBB, MI, DL, get(Opcode), DestReg) 261 .addReg(Spill.VGPR) 262 .addImm(FrameIndex); 263 insertNOPs(MI, 3); 264 } else { 265 llvm_unreachable("VGPR spilling not supported"); 266 } 267} 268 269static unsigned getNumSubRegsForSpillOp(unsigned Op) { 270 271 switch (Op) { 272 case AMDGPU::SI_SPILL_S512_SAVE: 273 case AMDGPU::SI_SPILL_S512_RESTORE: 274 return 16; 275 case AMDGPU::SI_SPILL_S256_SAVE: 276 case AMDGPU::SI_SPILL_S256_RESTORE: 277 return 8; 278 case AMDGPU::SI_SPILL_S128_SAVE: 279 case AMDGPU::SI_SPILL_S128_RESTORE: 280 return 4; 281 case AMDGPU::SI_SPILL_S64_SAVE: 282 case AMDGPU::SI_SPILL_S64_RESTORE: 283 return 2; 284 default: llvm_unreachable("Invalid spill opcode"); 285 } 286} 287 288void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, 289 int Count) const { 290 while (Count > 0) { 291 int Arg; 292 if (Count >= 8) 293 Arg = 7; 294 else 295 Arg = Count - 1; 296 Count -= 8; 297 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) 298 .addImm(Arg); 299 } 300} 301 302bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { 303 SIMachineFunctionInfo *MFI = 304 MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>(); 305 MachineBasicBlock &MBB = *MI->getParent(); 306 DebugLoc DL = MBB.findDebugLoc(MI); 307 switch (MI->getOpcode()) { 308 default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 309 310 // SGPR register spill 311 case AMDGPU::SI_SPILL_S512_SAVE: 312 case AMDGPU::SI_SPILL_S256_SAVE: 313 case AMDGPU::SI_SPILL_S128_SAVE: 314 case AMDGPU::SI_SPILL_S64_SAVE: { 315 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 316 unsigned FrameIndex = MI->getOperand(2).getImm(); 317 318 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 319 SIMachineFunctionInfo::SpilledReg Spill; 320 unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(), 321 &AMDGPU::SGPR_32RegClass, i); 322 Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); 323 324 BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), 325 MI->getOperand(0).getReg()) 326 .addReg(SubReg) 327 .addImm(Spill.Lane + i); 328 } 329 MI->eraseFromParent(); 330 break; 331 } 332 333 // SGPR register restore 334 case AMDGPU::SI_SPILL_S512_RESTORE: 335 case AMDGPU::SI_SPILL_S256_RESTORE: 336 case AMDGPU::SI_SPILL_S128_RESTORE: 337 case AMDGPU::SI_SPILL_S64_RESTORE: { 338 unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); 339 340 for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { 341 SIMachineFunctionInfo::SpilledReg Spill; 342 unsigned FrameIndex = MI->getOperand(2).getImm(); 343 unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(), 344 &AMDGPU::SGPR_32RegClass, i); 345 Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); 346 347 BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg) 348 .addReg(MI->getOperand(1).getReg()) 349 .addImm(Spill.Lane + i); 350 } 351 MI->eraseFromParent(); 352 break; 353 } 354 } 355 return true; 356} 357 358MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 359 bool NewMI) const { 360 361 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 362 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) 363 return nullptr; 364 365 // Cannot commute VOP2 if src0 is SGPR. 366 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && 367 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) 368 return nullptr; 369 370 if (!MI->getOperand(2).isReg()) { 371 // XXX: Commute instructions with FPImm operands 372 if (NewMI || MI->getOperand(2).isFPImm() || 373 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 374 return nullptr; 375 } 376 377 // XXX: Commute VOP3 instructions with abs and neg set. 378 if (isVOP3(MI->getOpcode()) && 379 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), 380 AMDGPU::OpName::abs)).getImm() || 381 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), 382 AMDGPU::OpName::neg)).getImm())) 383 return nullptr; 384 385 unsigned Reg = MI->getOperand(1).getReg(); 386 unsigned SubReg = MI->getOperand(1).getSubReg(); 387 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); 388 MI->getOperand(2).ChangeToRegister(Reg, false); 389 MI->getOperand(2).setSubReg(SubReg); 390 } else { 391 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 392 } 393 394 if (MI) 395 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); 396 397 return MI; 398} 399 400MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 401 MachineBasicBlock::iterator I, 402 unsigned DstReg, 403 unsigned SrcReg) const { 404 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), 405 DstReg) .addReg(SrcReg); 406} 407 408bool SIInstrInfo::isMov(unsigned Opcode) const { 409 switch(Opcode) { 410 default: return false; 411 case AMDGPU::S_MOV_B32: 412 case AMDGPU::S_MOV_B64: 413 case AMDGPU::V_MOV_B32_e32: 414 case AMDGPU::V_MOV_B32_e64: 415 return true; 416 } 417} 418 419bool 420SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 421 return RC != &AMDGPU::EXECRegRegClass; 422} 423 424bool 425SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI, 426 AliasAnalysis *AA) const { 427 switch(MI->getOpcode()) { 428 default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA); 429 case AMDGPU::S_MOV_B32: 430 case AMDGPU::S_MOV_B64: 431 case AMDGPU::V_MOV_B32_e32: 432 return MI->getOperand(1).isImm(); 433 } 434} 435 436namespace llvm { 437namespace AMDGPU { 438// Helper function generated by tablegen. We are wrapping this with 439// an SIInstrInfo function that reutrns bool rather than int. 440int isDS(uint16_t Opcode); 441} 442} 443 444bool SIInstrInfo::isDS(uint16_t Opcode) const { 445 return ::AMDGPU::isDS(Opcode) != -1; 446} 447 448int SIInstrInfo::isMIMG(uint16_t Opcode) const { 449 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 450} 451 452int SIInstrInfo::isSMRD(uint16_t Opcode) const { 453 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 454} 455 456bool SIInstrInfo::isVOP1(uint16_t Opcode) const { 457 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 458} 459 460bool SIInstrInfo::isVOP2(uint16_t Opcode) const { 461 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 462} 463 464bool SIInstrInfo::isVOP3(uint16_t Opcode) const { 465 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 466} 467 468bool SIInstrInfo::isVOPC(uint16_t Opcode) const { 469 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 470} 471 472bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { 473 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; 474} 475 476bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 477 int32_t Val = Imm.getSExtValue(); 478 if (Val >= -16 && Val <= 64) 479 return true; 480 481 // The actual type of the operand does not seem to matter as long 482 // as the bits match one of the inline immediate values. For example: 483 // 484 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 485 // so it is a legal inline immediate. 486 // 487 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 488 // floating-point, so it is a legal inline immediate. 489 490 return (APInt::floatToBits(0.0f) == Imm) || 491 (APInt::floatToBits(1.0f) == Imm) || 492 (APInt::floatToBits(-1.0f) == Imm) || 493 (APInt::floatToBits(0.5f) == Imm) || 494 (APInt::floatToBits(-0.5f) == Imm) || 495 (APInt::floatToBits(2.0f) == Imm) || 496 (APInt::floatToBits(-2.0f) == Imm) || 497 (APInt::floatToBits(4.0f) == Imm) || 498 (APInt::floatToBits(-4.0f) == Imm); 499} 500 501bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { 502 if (MO.isImm()) 503 return isInlineConstant(APInt(32, MO.getImm(), true)); 504 505 if (MO.isFPImm()) { 506 APFloat FpImm = MO.getFPImm()->getValueAPF(); 507 return isInlineConstant(FpImm.bitcastToAPInt()); 508 } 509 510 return false; 511} 512 513bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { 514 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); 515} 516 517bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 518 StringRef &ErrInfo) const { 519 uint16_t Opcode = MI->getOpcode(); 520 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 521 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 522 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 523 524 // Make sure the number of operands is correct. 525 const MCInstrDesc &Desc = get(Opcode); 526 if (!Desc.isVariadic() && 527 Desc.getNumOperands() != MI->getNumExplicitOperands()) { 528 ErrInfo = "Instruction has wrong number of operands."; 529 return false; 530 } 531 532 // Make sure the register classes are correct 533 for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) { 534 switch (Desc.OpInfo[i].OperandType) { 535 case MCOI::OPERAND_REGISTER: 536 break; 537 case MCOI::OPERAND_IMMEDIATE: 538 if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) { 539 ErrInfo = "Expected immediate, but got non-immediate"; 540 return false; 541 } 542 // Fall-through 543 default: 544 continue; 545 } 546 547 if (!MI->getOperand(i).isReg()) 548 continue; 549 550 int RegClass = Desc.OpInfo[i].RegClass; 551 if (RegClass != -1) { 552 unsigned Reg = MI->getOperand(i).getReg(); 553 if (TargetRegisterInfo::isVirtualRegister(Reg)) 554 continue; 555 556 const TargetRegisterClass *RC = RI.getRegClass(RegClass); 557 if (!RC->contains(Reg)) { 558 ErrInfo = "Operand has incorrect register class."; 559 return false; 560 } 561 } 562 } 563 564 565 // Verify VOP* 566 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 567 unsigned ConstantBusCount = 0; 568 unsigned SGPRUsed = AMDGPU::NoRegister; 569 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { 570 const MachineOperand &MO = MI->getOperand(i); 571 if (MO.isReg() && MO.isUse() && 572 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 573 574 // EXEC register uses the constant bus. 575 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 576 ++ConstantBusCount; 577 578 // SGPRs use the constant bus 579 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || 580 (!MO.isImplicit() && 581 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 582 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { 583 if (SGPRUsed != MO.getReg()) { 584 ++ConstantBusCount; 585 SGPRUsed = MO.getReg(); 586 } 587 } 588 } 589 // Literal constants use the constant bus. 590 if (isLiteralConstant(MO)) 591 ++ConstantBusCount; 592 } 593 if (ConstantBusCount > 1) { 594 ErrInfo = "VOP* instruction uses the constant bus more than once"; 595 return false; 596 } 597 } 598 599 // Verify SRC1 for VOP2 and VOPC 600 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { 601 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 602 if (Src1.isImm() || Src1.isFPImm()) { 603 ErrInfo = "VOP[2C] src1 cannot be an immediate."; 604 return false; 605 } 606 } 607 608 // Verify VOP3 609 if (isVOP3(Opcode)) { 610 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { 611 ErrInfo = "VOP3 src0 cannot be a literal constant."; 612 return false; 613 } 614 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { 615 ErrInfo = "VOP3 src1 cannot be a literal constant."; 616 return false; 617 } 618 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { 619 ErrInfo = "VOP3 src2 cannot be a literal constant."; 620 return false; 621 } 622 } 623 return true; 624} 625 626unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 627 switch (MI.getOpcode()) { 628 default: return AMDGPU::INSTRUCTION_LIST_END; 629 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 630 case AMDGPU::COPY: return AMDGPU::COPY; 631 case AMDGPU::PHI: return AMDGPU::PHI; 632 case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 633 case AMDGPU::S_MOV_B32: 634 return MI.getOperand(1).isReg() ? 635 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 636 case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; 637 case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 638 case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; 639 case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 640 case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 641 case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 642 case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 643 case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 644 case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 645 case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 646 case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 647 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 648 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 649 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 650 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 651 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 652 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 653 case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 654 case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 655 case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 656 case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 657 case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 658 case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 659 case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 660 case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 661 case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 662 case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 663 case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 664 case AMDGPU::S_LOAD_DWORD_IMM: 665 case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 666 case AMDGPU::S_LOAD_DWORDX2_IMM: 667 case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 668 case AMDGPU::S_LOAD_DWORDX4_IMM: 669 case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 670 } 671} 672 673bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 674 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 675} 676 677const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 678 unsigned OpNo) const { 679 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 680 const MCInstrDesc &Desc = get(MI.getOpcode()); 681 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 682 Desc.OpInfo[OpNo].RegClass == -1) 683 return MRI.getRegClass(MI.getOperand(OpNo).getReg()); 684 685 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 686 return RI.getRegClass(RCID); 687} 688 689bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 690 switch (MI.getOpcode()) { 691 case AMDGPU::COPY: 692 case AMDGPU::REG_SEQUENCE: 693 case AMDGPU::PHI: 694 case AMDGPU::INSERT_SUBREG: 695 return RI.hasVGPRs(getOpRegClass(MI, 0)); 696 default: 697 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 698 } 699} 700 701void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 702 MachineBasicBlock::iterator I = MI; 703 MachineOperand &MO = MI->getOperand(OpIdx); 704 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 705 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 706 const TargetRegisterClass *RC = RI.getRegClass(RCID); 707 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 708 if (MO.isReg()) { 709 Opcode = AMDGPU::COPY; 710 } else if (RI.isSGPRClass(RC)) { 711 Opcode = AMDGPU::S_MOV_B32; 712 } 713 714 const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 715 unsigned Reg = MRI.createVirtualRegister(VRC); 716 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), 717 Reg).addOperand(MO); 718 MO.ChangeToRegister(Reg, false); 719} 720 721unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 722 MachineRegisterInfo &MRI, 723 MachineOperand &SuperReg, 724 const TargetRegisterClass *SuperRC, 725 unsigned SubIdx, 726 const TargetRegisterClass *SubRC) 727 const { 728 assert(SuperReg.isReg()); 729 730 unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 731 unsigned SubReg = MRI.createVirtualRegister(SubRC); 732 733 // Just in case the super register is itself a sub-register, copy it to a new 734 // value so we don't need to wory about merging its subreg index with the 735 // SubIdx passed to this function. The register coalescer should be able to 736 // eliminate this extra copy. 737 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), 738 NewSuperReg) 739 .addOperand(SuperReg); 740 741 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), 742 SubReg) 743 .addReg(NewSuperReg, 0, SubIdx); 744 return SubReg; 745} 746 747MachineOperand SIInstrInfo::buildExtractSubRegOrImm( 748 MachineBasicBlock::iterator MII, 749 MachineRegisterInfo &MRI, 750 MachineOperand &Op, 751 const TargetRegisterClass *SuperRC, 752 unsigned SubIdx, 753 const TargetRegisterClass *SubRC) const { 754 if (Op.isImm()) { 755 // XXX - Is there a better way to do this? 756 if (SubIdx == AMDGPU::sub0) 757 return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 758 if (SubIdx == AMDGPU::sub1) 759 return MachineOperand::CreateImm(Op.getImm() >> 32); 760 761 llvm_unreachable("Unhandled register index for immediate"); 762 } 763 764 unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 765 SubIdx, SubRC); 766 return MachineOperand::CreateReg(SubReg, false); 767} 768 769unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist, 770 MachineBasicBlock::iterator MI, 771 MachineRegisterInfo &MRI, 772 const TargetRegisterClass *RC, 773 const MachineOperand &Op) const { 774 MachineBasicBlock *MBB = MI->getParent(); 775 DebugLoc DL = MI->getDebugLoc(); 776 unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 777 unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 778 unsigned Dst = MRI.createVirtualRegister(RC); 779 780 MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 781 LoDst) 782 .addImm(Op.getImm() & 0xFFFFFFFF); 783 MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), 784 HiDst) 785 .addImm(Op.getImm() >> 32); 786 787 BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst) 788 .addReg(LoDst) 789 .addImm(AMDGPU::sub0) 790 .addReg(HiDst) 791 .addImm(AMDGPU::sub1); 792 793 Worklist.push_back(Lo); 794 Worklist.push_back(Hi); 795 796 return Dst; 797} 798 799void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 800 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 801 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 802 AMDGPU::OpName::src0); 803 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 804 AMDGPU::OpName::src1); 805 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 806 AMDGPU::OpName::src2); 807 808 // Legalize VOP2 809 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { 810 MachineOperand &Src0 = MI->getOperand(Src0Idx); 811 MachineOperand &Src1 = MI->getOperand(Src1Idx); 812 813 // If the instruction implicitly reads VCC, we can't have any SGPR operands, 814 // so move any. 815 bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI); 816 if (ReadsVCC && Src0.isReg() && 817 RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) { 818 legalizeOpWithMove(MI, Src0Idx); 819 return; 820 } 821 822 if (ReadsVCC && Src1.isReg() && 823 RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { 824 legalizeOpWithMove(MI, Src1Idx); 825 return; 826 } 827 828 // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must 829 // be the first operand, and there can only be one. 830 if (Src1.isImm() || Src1.isFPImm() || 831 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) { 832 if (MI->isCommutable()) { 833 if (commuteInstruction(MI)) 834 return; 835 } 836 legalizeOpWithMove(MI, Src1Idx); 837 } 838 } 839 840 // XXX - Do any VOP3 instructions read VCC? 841 // Legalize VOP3 842 if (isVOP3(MI->getOpcode())) { 843 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; 844 unsigned SGPRReg = AMDGPU::NoRegister; 845 for (unsigned i = 0; i < 3; ++i) { 846 int Idx = VOP3Idx[i]; 847 if (Idx == -1) 848 continue; 849 MachineOperand &MO = MI->getOperand(Idx); 850 851 if (MO.isReg()) { 852 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 853 continue; // VGPRs are legal 854 855 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); 856 857 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 858 SGPRReg = MO.getReg(); 859 // We can use one SGPR in each VOP3 instruction. 860 continue; 861 } 862 } else if (!isLiteralConstant(MO)) { 863 // If it is not a register and not a literal constant, then it must be 864 // an inline constant which is always legal. 865 continue; 866 } 867 // If we make it this far, then the operand is not legal and we must 868 // legalize it. 869 legalizeOpWithMove(MI, Idx); 870 } 871 } 872 873 // Legalize REG_SEQUENCE and PHI 874 // The register class of the operands much be the same type as the register 875 // class of the output. 876 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || 877 MI->getOpcode() == AMDGPU::PHI) { 878 const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 879 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 880 if (!MI->getOperand(i).isReg() || 881 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 882 continue; 883 const TargetRegisterClass *OpRC = 884 MRI.getRegClass(MI->getOperand(i).getReg()); 885 if (RI.hasVGPRs(OpRC)) { 886 VRC = OpRC; 887 } else { 888 SRC = OpRC; 889 } 890 } 891 892 // If any of the operands are VGPR registers, then they all most be 893 // otherwise we will create illegal VGPR->SGPR copies when legalizing 894 // them. 895 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 896 if (!VRC) { 897 assert(SRC); 898 VRC = RI.getEquivalentVGPRClass(SRC); 899 } 900 RC = VRC; 901 } else { 902 RC = SRC; 903 } 904 905 // Update all the operands so they have the same type. 906 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 907 if (!MI->getOperand(i).isReg() || 908 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 909 continue; 910 unsigned DstReg = MRI.createVirtualRegister(RC); 911 MachineBasicBlock *InsertBB; 912 MachineBasicBlock::iterator Insert; 913 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 914 InsertBB = MI->getParent(); 915 Insert = MI; 916 } else { 917 // MI is a PHI instruction. 918 InsertBB = MI->getOperand(i + 1).getMBB(); 919 Insert = InsertBB->getFirstTerminator(); 920 } 921 BuildMI(*InsertBB, Insert, MI->getDebugLoc(), 922 get(AMDGPU::COPY), DstReg) 923 .addOperand(MI->getOperand(i)); 924 MI->getOperand(i).setReg(DstReg); 925 } 926 } 927 928 // Legalize INSERT_SUBREG 929 // src0 must have the same register class as dst 930 if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { 931 unsigned Dst = MI->getOperand(0).getReg(); 932 unsigned Src0 = MI->getOperand(1).getReg(); 933 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 934 const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 935 if (DstRC != Src0RC) { 936 MachineBasicBlock &MBB = *MI->getParent(); 937 unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 938 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 939 .addReg(Src0); 940 MI->getOperand(1).setReg(NewSrc0); 941 } 942 return; 943 } 944 945 // Legalize MUBUF* instructions 946 // FIXME: If we start using the non-addr64 instructions for compute, we 947 // may need to legalize them here. 948 949 int SRsrcIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 950 AMDGPU::OpName::srsrc); 951 int VAddrIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 952 AMDGPU::OpName::vaddr); 953 if (SRsrcIdx != -1 && VAddrIdx != -1) { 954 const TargetRegisterClass *VAddrRC = 955 RI.getRegClass(get(MI->getOpcode()).OpInfo[VAddrIdx].RegClass); 956 957 if(VAddrRC->getSize() == 8 && 958 MRI.getRegClass(MI->getOperand(SRsrcIdx).getReg()) != VAddrRC) { 959 // We have a MUBUF instruction that uses a 64-bit vaddr register and 960 // srsrc has the incorrect register class. In order to fix this, we 961 // need to extract the pointer from the resource descriptor (srsrc), 962 // add it to the value of vadd, then store the result in the vaddr 963 // operand. Then, we need to set the pointer field of the resource 964 // descriptor to zero. 965 966 MachineBasicBlock &MBB = *MI->getParent(); 967 MachineOperand &SRsrcOp = MI->getOperand(SRsrcIdx); 968 MachineOperand &VAddrOp = MI->getOperand(VAddrIdx); 969 unsigned SRsrcPtrLo, SRsrcPtrHi, VAddrLo, VAddrHi; 970 unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 971 unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 972 unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 973 unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 974 unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 975 unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 976 unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 977 978 // SRsrcPtrLo = srsrc:sub0 979 SRsrcPtrLo = buildExtractSubReg(MI, MRI, SRsrcOp, 980 &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); 981 982 // SRsrcPtrHi = srsrc:sub1 983 SRsrcPtrHi = buildExtractSubReg(MI, MRI, SRsrcOp, 984 &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); 985 986 // VAddrLo = vaddr:sub0 987 VAddrLo = buildExtractSubReg(MI, MRI, VAddrOp, 988 &AMDGPU::VReg_64RegClass, AMDGPU::sub0, &AMDGPU::VReg_32RegClass); 989 990 // VAddrHi = vaddr:sub1 991 VAddrHi = buildExtractSubReg(MI, MRI, VAddrOp, 992 &AMDGPU::VReg_64RegClass, AMDGPU::sub1, &AMDGPU::VReg_32RegClass); 993 994 // NewVaddrLo = SRsrcPtrLo + VAddrLo 995 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32), 996 NewVAddrLo) 997 .addReg(SRsrcPtrLo) 998 .addReg(VAddrLo) 999 .addReg(AMDGPU::VCC, RegState::Define | RegState::Implicit); 1000 1001 // NewVaddrHi = SRsrcPtrHi + VAddrHi 1002 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32), 1003 NewVAddrHi) 1004 .addReg(SRsrcPtrHi) 1005 .addReg(VAddrHi) 1006 .addReg(AMDGPU::VCC, RegState::ImplicitDefine) 1007 .addReg(AMDGPU::VCC, RegState::Implicit); 1008 1009 // NewVaddr = {NewVaddrHi, NewVaddrLo} 1010 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1011 NewVAddr) 1012 .addReg(NewVAddrLo) 1013 .addImm(AMDGPU::sub0) 1014 .addReg(NewVAddrHi) 1015 .addImm(AMDGPU::sub1); 1016 1017 // Zero64 = 0 1018 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64), 1019 Zero64) 1020 .addImm(0); 1021 1022 // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 1023 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1024 SRsrcFormatLo) 1025 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); 1026 1027 // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 1028 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1029 SRsrcFormatHi) 1030 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 1031 1032 // NewSRsrc = {Zero64, SRsrcFormat} 1033 BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 1034 NewSRsrc) 1035 .addReg(Zero64) 1036 .addImm(AMDGPU::sub0_sub1) 1037 .addReg(SRsrcFormatLo) 1038 .addImm(AMDGPU::sub2) 1039 .addReg(SRsrcFormatHi) 1040 .addImm(AMDGPU::sub3); 1041 1042 // Update the instruction to use NewVaddr 1043 MI->getOperand(VAddrIdx).setReg(NewVAddr); 1044 // Update the instruction to use NewSRsrc 1045 MI->getOperand(SRsrcIdx).setReg(NewSRsrc); 1046 } 1047 } 1048} 1049 1050void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { 1051 MachineBasicBlock *MBB = MI->getParent(); 1052 switch (MI->getOpcode()) { 1053 case AMDGPU::S_LOAD_DWORD_IMM: 1054 case AMDGPU::S_LOAD_DWORD_SGPR: 1055 case AMDGPU::S_LOAD_DWORDX2_IMM: 1056 case AMDGPU::S_LOAD_DWORDX2_SGPR: 1057 case AMDGPU::S_LOAD_DWORDX4_IMM: 1058 case AMDGPU::S_LOAD_DWORDX4_SGPR: 1059 unsigned NewOpcode = getVALUOp(*MI); 1060 unsigned RegOffset; 1061 unsigned ImmOffset; 1062 1063 if (MI->getOperand(2).isReg()) { 1064 RegOffset = MI->getOperand(2).getReg(); 1065 ImmOffset = 0; 1066 } else { 1067 assert(MI->getOperand(2).isImm()); 1068 // SMRD instructions take a dword offsets and MUBUF instructions 1069 // take a byte offset. 1070 ImmOffset = MI->getOperand(2).getImm() << 2; 1071 RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1072 if (isUInt<12>(ImmOffset)) { 1073 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1074 RegOffset) 1075 .addImm(0); 1076 } else { 1077 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), 1078 RegOffset) 1079 .addImm(ImmOffset); 1080 ImmOffset = 0; 1081 } 1082 } 1083 1084 unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 1085 unsigned DWord0 = RegOffset; 1086 unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1087 unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1088 unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 1089 1090 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) 1091 .addImm(0); 1092 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) 1093 .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); 1094 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) 1095 .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 1096 BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) 1097 .addReg(DWord0) 1098 .addImm(AMDGPU::sub0) 1099 .addReg(DWord1) 1100 .addImm(AMDGPU::sub1) 1101 .addReg(DWord2) 1102 .addImm(AMDGPU::sub2) 1103 .addReg(DWord3) 1104 .addImm(AMDGPU::sub3); 1105 MI->setDesc(get(NewOpcode)); 1106 if (MI->getOperand(2).isReg()) { 1107 MI->getOperand(2).setReg(MI->getOperand(1).getReg()); 1108 } else { 1109 MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); 1110 } 1111 MI->getOperand(1).setReg(SRsrc); 1112 MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); 1113 } 1114} 1115 1116void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 1117 SmallVector<MachineInstr *, 128> Worklist; 1118 Worklist.push_back(&TopInst); 1119 1120 while (!Worklist.empty()) { 1121 MachineInstr *Inst = Worklist.pop_back_val(); 1122 MachineBasicBlock *MBB = Inst->getParent(); 1123 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1124 1125 unsigned Opcode = Inst->getOpcode(); 1126 unsigned NewOpcode = getVALUOp(*Inst); 1127 1128 // Handle some special cases 1129 switch (Opcode) { 1130 default: 1131 if (isSMRD(Inst->getOpcode())) { 1132 moveSMRDToVALU(Inst, MRI); 1133 } 1134 break; 1135 case AMDGPU::S_MOV_B64: { 1136 DebugLoc DL = Inst->getDebugLoc(); 1137 1138 // If the source operand is a register we can replace this with a 1139 // copy. 1140 if (Inst->getOperand(1).isReg()) { 1141 MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY)) 1142 .addOperand(Inst->getOperand(0)) 1143 .addOperand(Inst->getOperand(1)); 1144 Worklist.push_back(Copy); 1145 } else { 1146 // Otherwise, we need to split this into two movs, because there is 1147 // no 64-bit VALU move instruction. 1148 unsigned Reg = Inst->getOperand(0).getReg(); 1149 unsigned Dst = split64BitImm(Worklist, 1150 Inst, 1151 MRI, 1152 MRI.getRegClass(Reg), 1153 Inst->getOperand(1)); 1154 MRI.replaceRegWith(Reg, Dst); 1155 } 1156 Inst->eraseFromParent(); 1157 continue; 1158 } 1159 case AMDGPU::S_AND_B64: 1160 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32); 1161 Inst->eraseFromParent(); 1162 continue; 1163 1164 case AMDGPU::S_OR_B64: 1165 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32); 1166 Inst->eraseFromParent(); 1167 continue; 1168 1169 case AMDGPU::S_XOR_B64: 1170 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32); 1171 Inst->eraseFromParent(); 1172 continue; 1173 1174 case AMDGPU::S_NOT_B64: 1175 splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32); 1176 Inst->eraseFromParent(); 1177 continue; 1178 1179 case AMDGPU::S_BFE_U64: 1180 case AMDGPU::S_BFE_I64: 1181 case AMDGPU::S_BFM_B64: 1182 llvm_unreachable("Moving this op to VALU not implemented"); 1183 } 1184 1185 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 1186 // We cannot move this instruction to the VALU, so we should try to 1187 // legalize its operands instead. 1188 legalizeOperands(Inst); 1189 continue; 1190 } 1191 1192 // Use the new VALU Opcode. 1193 const MCInstrDesc &NewDesc = get(NewOpcode); 1194 Inst->setDesc(NewDesc); 1195 1196 // Remove any references to SCC. Vector instructions can't read from it, and 1197 // We're just about to add the implicit use / defs of VCC, and we don't want 1198 // both. 1199 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 1200 MachineOperand &Op = Inst->getOperand(i); 1201 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) 1202 Inst->RemoveOperand(i); 1203 } 1204 1205 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 1206 // We are converting these to a BFE, so we need to add the missing 1207 // operands for the size and offset. 1208 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 1209 Inst->addOperand(Inst->getOperand(1)); 1210 Inst->getOperand(1).ChangeToImmediate(0); 1211 Inst->addOperand(MachineOperand::CreateImm(0)); 1212 Inst->addOperand(MachineOperand::CreateImm(0)); 1213 Inst->addOperand(MachineOperand::CreateImm(0)); 1214 Inst->addOperand(MachineOperand::CreateImm(Size)); 1215 1216 // XXX - Other pointless operands. There are 4, but it seems you only need 1217 // 3 to not hit an assertion later in MCInstLower. 1218 Inst->addOperand(MachineOperand::CreateImm(0)); 1219 Inst->addOperand(MachineOperand::CreateImm(0)); 1220 } 1221 1222 addDescImplicitUseDef(NewDesc, Inst); 1223 1224 if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 1225 const MachineOperand &OffsetWidthOp = Inst->getOperand(2); 1226 // If we need to move this to VGPRs, we need to unpack the second operand 1227 // back into the 2 separate ones for bit offset and width. 1228 assert(OffsetWidthOp.isImm() && 1229 "Scalar BFE is only implemented for constant width and offset"); 1230 uint32_t Imm = OffsetWidthOp.getImm(); 1231 1232 uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 1233 uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 1234 1235 Inst->RemoveOperand(2); // Remove old immediate. 1236 Inst->addOperand(Inst->getOperand(1)); 1237 Inst->getOperand(1).ChangeToImmediate(0); 1238 Inst->addOperand(MachineOperand::CreateImm(0)); 1239 Inst->addOperand(MachineOperand::CreateImm(Offset)); 1240 Inst->addOperand(MachineOperand::CreateImm(0)); 1241 Inst->addOperand(MachineOperand::CreateImm(BitWidth)); 1242 Inst->addOperand(MachineOperand::CreateImm(0)); 1243 Inst->addOperand(MachineOperand::CreateImm(0)); 1244 } 1245 1246 // Update the destination register class. 1247 1248 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); 1249 1250 switch (Opcode) { 1251 // For target instructions, getOpRegClass just returns the virtual 1252 // register class associated with the operand, so we need to find an 1253 // equivalent VGPR register class in order to move the instruction to the 1254 // VALU. 1255 case AMDGPU::COPY: 1256 case AMDGPU::PHI: 1257 case AMDGPU::REG_SEQUENCE: 1258 case AMDGPU::INSERT_SUBREG: 1259 if (RI.hasVGPRs(NewDstRC)) 1260 continue; 1261 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 1262 if (!NewDstRC) 1263 continue; 1264 break; 1265 default: 1266 break; 1267 } 1268 1269 unsigned DstReg = Inst->getOperand(0).getReg(); 1270 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 1271 MRI.replaceRegWith(DstReg, NewDstReg); 1272 1273 // Legalize the operands 1274 legalizeOperands(Inst); 1275 1276 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), 1277 E = MRI.use_end(); I != E; ++I) { 1278 MachineInstr &UseMI = *I->getParent(); 1279 if (!canReadVGPR(UseMI, I.getOperandNo())) { 1280 Worklist.push_back(&UseMI); 1281 } 1282 } 1283 } 1284} 1285 1286//===----------------------------------------------------------------------===// 1287// Indirect addressing callbacks 1288//===----------------------------------------------------------------------===// 1289 1290unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, 1291 unsigned Channel) const { 1292 assert(Channel == 0); 1293 return RegIndex; 1294} 1295 1296const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 1297 return &AMDGPU::VReg_32RegClass; 1298} 1299 1300void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist, 1301 MachineInstr *Inst, 1302 unsigned Opcode) const { 1303 MachineBasicBlock &MBB = *Inst->getParent(); 1304 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 1305 1306 MachineOperand &Dest = Inst->getOperand(0); 1307 MachineOperand &Src0 = Inst->getOperand(1); 1308 MachineOperand &Src1 = Inst->getOperand(2); 1309 DebugLoc DL = Inst->getDebugLoc(); 1310 1311 MachineBasicBlock::iterator MII = Inst; 1312 1313 const MCInstrDesc &InstDesc = get(Opcode); 1314 const TargetRegisterClass *Src0RC = Src0.isReg() ? 1315 MRI.getRegClass(Src0.getReg()) : 1316 &AMDGPU::SGPR_32RegClass; 1317 1318 const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 1319 const TargetRegisterClass *Src1RC = Src1.isReg() ? 1320 MRI.getRegClass(Src1.getReg()) : 1321 &AMDGPU::SGPR_32RegClass; 1322 1323 const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 1324 1325 MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 1326 AMDGPU::sub0, Src0SubRC); 1327 MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 1328 AMDGPU::sub0, Src1SubRC); 1329 1330 const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 1331 const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); 1332 1333 unsigned DestSub0 = MRI.createVirtualRegister(DestRC); 1334 MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) 1335 .addOperand(SrcReg0Sub0) 1336 .addOperand(SrcReg1Sub0); 1337 1338 MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 1339 AMDGPU::sub1, Src0SubRC); 1340 MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 1341 AMDGPU::sub1, Src1SubRC); 1342 1343 unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); 1344 MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) 1345 .addOperand(SrcReg0Sub1) 1346 .addOperand(SrcReg1Sub1); 1347 1348 unsigned FullDestReg = MRI.createVirtualRegister(DestRC); 1349 BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 1350 .addReg(DestSub0) 1351 .addImm(AMDGPU::sub0) 1352 .addReg(DestSub1) 1353 .addImm(AMDGPU::sub1); 1354 1355 MRI.replaceRegWith(Dest.getReg(), FullDestReg); 1356 1357 // Try to legalize the operands in case we need to swap the order to keep it 1358 // valid. 1359 Worklist.push_back(LoHalf); 1360 Worklist.push_back(HiHalf); 1361} 1362 1363void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, 1364 MachineInstr *Inst) const { 1365 // Add the implict and explicit register definitions. 1366 if (NewDesc.ImplicitUses) { 1367 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { 1368 unsigned Reg = NewDesc.ImplicitUses[i]; 1369 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); 1370 } 1371 } 1372 1373 if (NewDesc.ImplicitDefs) { 1374 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { 1375 unsigned Reg = NewDesc.ImplicitDefs[i]; 1376 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); 1377 } 1378 } 1379} 1380 1381MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 1382 MachineBasicBlock *MBB, 1383 MachineBasicBlock::iterator I, 1384 unsigned ValueReg, 1385 unsigned Address, unsigned OffsetReg) const { 1386 const DebugLoc &DL = MBB->findDebugLoc(I); 1387 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 1388 getIndirectIndexBegin(*MBB->getParent())); 1389 1390 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 1391 .addReg(IndirectBaseReg, RegState::Define) 1392 .addOperand(I->getOperand(0)) 1393 .addReg(IndirectBaseReg) 1394 .addReg(OffsetReg) 1395 .addImm(0) 1396 .addReg(ValueReg); 1397} 1398 1399MachineInstrBuilder SIInstrInfo::buildIndirectRead( 1400 MachineBasicBlock *MBB, 1401 MachineBasicBlock::iterator I, 1402 unsigned ValueReg, 1403 unsigned Address, unsigned OffsetReg) const { 1404 const DebugLoc &DL = MBB->findDebugLoc(I); 1405 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 1406 getIndirectIndexBegin(*MBB->getParent())); 1407 1408 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 1409 .addOperand(I->getOperand(0)) 1410 .addOperand(I->getOperand(1)) 1411 .addReg(IndirectBaseReg) 1412 .addReg(OffsetReg) 1413 .addImm(0); 1414 1415} 1416 1417void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1418 const MachineFunction &MF) const { 1419 int End = getIndirectIndexEnd(MF); 1420 int Begin = getIndirectIndexBegin(MF); 1421 1422 if (End == -1) 1423 return; 1424 1425 1426 for (int Index = Begin; Index <= End; ++Index) 1427 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); 1428 1429 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 1430 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 1431 1432 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 1433 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 1434 1435 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 1436 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 1437 1438 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 1439 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 1440 1441 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 1442 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 1443} 1444