SIInstrInfo.cpp revision 836c5133c66edecedeaa79448964b4c103f99271
1//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief SI Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15 16#include "SIInstrInfo.h" 17#include "AMDGPUTargetMachine.h" 18#include "SIDefines.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/MC/MCInstrDesc.h" 22 23using namespace llvm; 24 25SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) 26 : AMDGPUInstrInfo(tm), 27 RI(tm) 28 { } 29 30const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { 31 return RI; 32} 33 34//===----------------------------------------------------------------------===// 35// TargetInstrInfo callbacks 36//===----------------------------------------------------------------------===// 37 38void 39SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 40 MachineBasicBlock::iterator MI, DebugLoc DL, 41 unsigned DestReg, unsigned SrcReg, 42 bool KillSrc) const { 43 44 // If we are trying to copy to or from SCC, there is a bug somewhere else in 45 // the backend. While it may be theoretically possible to do this, it should 46 // never be necessary. 47 assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 48 49 static const int16_t Sub0_15[] = { 50 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 51 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 52 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 53 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 54 }; 55 56 static const int16_t Sub0_7[] = { 57 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 58 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 59 }; 60 61 static const int16_t Sub0_3[] = { 62 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 63 }; 64 65 static const int16_t Sub0_2[] = { 66 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 67 }; 68 69 static const int16_t Sub0_1[] = { 70 AMDGPU::sub0, AMDGPU::sub1, 0 71 }; 72 73 unsigned Opcode; 74 const int16_t *SubIndices; 75 76 if (AMDGPU::M0 == DestReg) { 77 // Check if M0 isn't already set to this value 78 for (MachineBasicBlock::reverse_iterator E = MBB.rend(), 79 I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { 80 81 if (!I->definesRegister(AMDGPU::M0)) 82 continue; 83 84 unsigned Opc = I->getOpcode(); 85 if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) 86 break; 87 88 if (!I->readsRegister(SrcReg)) 89 break; 90 91 // The copy isn't necessary 92 return; 93 } 94 } 95 96 if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 97 assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 98 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 99 .addReg(SrcReg, getKillRegState(KillSrc)); 100 return; 101 102 } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 103 assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 104 BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 105 .addReg(SrcReg, getKillRegState(KillSrc)); 106 return; 107 108 } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 109 assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 110 Opcode = AMDGPU::S_MOV_B32; 111 SubIndices = Sub0_3; 112 113 } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 114 assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 115 Opcode = AMDGPU::S_MOV_B32; 116 SubIndices = Sub0_7; 117 118 } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 119 assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 120 Opcode = AMDGPU::S_MOV_B32; 121 SubIndices = Sub0_15; 122 123 } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { 124 assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || 125 AMDGPU::SReg_32RegClass.contains(SrcReg)); 126 BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 127 .addReg(SrcReg, getKillRegState(KillSrc)); 128 return; 129 130 } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 131 assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 132 AMDGPU::SReg_64RegClass.contains(SrcReg)); 133 Opcode = AMDGPU::V_MOV_B32_e32; 134 SubIndices = Sub0_1; 135 136 } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 137 assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 138 Opcode = AMDGPU::V_MOV_B32_e32; 139 SubIndices = Sub0_2; 140 141 } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 142 assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 143 AMDGPU::SReg_128RegClass.contains(SrcReg)); 144 Opcode = AMDGPU::V_MOV_B32_e32; 145 SubIndices = Sub0_3; 146 147 } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 148 assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 149 AMDGPU::SReg_256RegClass.contains(SrcReg)); 150 Opcode = AMDGPU::V_MOV_B32_e32; 151 SubIndices = Sub0_7; 152 153 } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 154 assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 155 AMDGPU::SReg_512RegClass.contains(SrcReg)); 156 Opcode = AMDGPU::V_MOV_B32_e32; 157 SubIndices = Sub0_15; 158 159 } else { 160 llvm_unreachable("Can't copy register!"); 161 } 162 163 while (unsigned SubIdx = *SubIndices++) { 164 MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 165 get(Opcode), RI.getSubReg(DestReg, SubIdx)); 166 167 Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); 168 169 if (*SubIndices) 170 Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 171 } 172} 173 174unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { 175 int NewOpc; 176 177 // Try to map original to commuted opcode 178 if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) 179 return NewOpc; 180 181 // Try to map commuted to original opcode 182 if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) 183 return NewOpc; 184 185 return Opcode; 186} 187 188MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, 189 bool NewMI) const { 190 191 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 192 if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) 193 return 0; 194 195 // Cannot commute VOP2 if src0 is SGPR. 196 if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && 197 RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) 198 return 0; 199 200 if (!MI->getOperand(2).isReg()) { 201 // XXX: Commute instructions with FPImm operands 202 if (NewMI || MI->getOperand(2).isFPImm() || 203 (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { 204 return 0; 205 } 206 207 // XXX: Commute VOP3 instructions with abs and neg set. 208 if (isVOP3(MI->getOpcode()) && 209 (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), 210 AMDGPU::OpName::abs)).getImm() || 211 MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), 212 AMDGPU::OpName::neg)).getImm())) 213 return 0; 214 215 unsigned Reg = MI->getOperand(1).getReg(); 216 MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm()); 217 MI->getOperand(2).ChangeToRegister(Reg, false); 218 } else { 219 MI = TargetInstrInfo::commuteInstruction(MI, NewMI); 220 } 221 222 if (MI) 223 MI->setDesc(get(commuteOpcode(MI->getOpcode()))); 224 225 return MI; 226} 227 228MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB, 229 MachineBasicBlock::iterator I, 230 unsigned DstReg, 231 unsigned SrcReg) const { 232 return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32), 233 DstReg) .addReg(SrcReg); 234} 235 236bool SIInstrInfo::isMov(unsigned Opcode) const { 237 switch(Opcode) { 238 default: return false; 239 case AMDGPU::S_MOV_B32: 240 case AMDGPU::S_MOV_B64: 241 case AMDGPU::V_MOV_B32_e32: 242 case AMDGPU::V_MOV_B32_e64: 243 return true; 244 } 245} 246 247bool 248SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 249 return RC != &AMDGPU::EXECRegRegClass; 250} 251 252int SIInstrInfo::isMIMG(uint16_t Opcode) const { 253 return get(Opcode).TSFlags & SIInstrFlags::MIMG; 254} 255 256int SIInstrInfo::isSMRD(uint16_t Opcode) const { 257 return get(Opcode).TSFlags & SIInstrFlags::SMRD; 258} 259 260bool SIInstrInfo::isVOP1(uint16_t Opcode) const { 261 return get(Opcode).TSFlags & SIInstrFlags::VOP1; 262} 263 264bool SIInstrInfo::isVOP2(uint16_t Opcode) const { 265 return get(Opcode).TSFlags & SIInstrFlags::VOP2; 266} 267 268bool SIInstrInfo::isVOP3(uint16_t Opcode) const { 269 return get(Opcode).TSFlags & SIInstrFlags::VOP3; 270} 271 272bool SIInstrInfo::isVOPC(uint16_t Opcode) const { 273 return get(Opcode).TSFlags & SIInstrFlags::VOPC; 274} 275 276bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const { 277 return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU; 278} 279 280bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const { 281 if(MO.isImm()) { 282 return MO.getImm() >= -16 && MO.getImm() <= 64; 283 } 284 if (MO.isFPImm()) { 285 return MO.getFPImm()->isExactlyValue(0.0) || 286 MO.getFPImm()->isExactlyValue(0.5) || 287 MO.getFPImm()->isExactlyValue(-0.5) || 288 MO.getFPImm()->isExactlyValue(1.0) || 289 MO.getFPImm()->isExactlyValue(-1.0) || 290 MO.getFPImm()->isExactlyValue(2.0) || 291 MO.getFPImm()->isExactlyValue(-2.0) || 292 MO.getFPImm()->isExactlyValue(4.0) || 293 MO.getFPImm()->isExactlyValue(-4.0); 294 } 295 return false; 296} 297 298bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { 299 return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); 300} 301 302bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, 303 StringRef &ErrInfo) const { 304 uint16_t Opcode = MI->getOpcode(); 305 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 306 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 307 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 308 309 // Verify VOP* 310 if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) { 311 unsigned ConstantBusCount = 0; 312 unsigned SGPRUsed = AMDGPU::NoRegister; 313 for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { 314 const MachineOperand &MO = MI->getOperand(i); 315 if (MO.isReg() && MO.isUse() && 316 !TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 317 318 // EXEC register uses the constant bus. 319 if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 320 ++ConstantBusCount; 321 322 // SGPRs use the constant bus 323 if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC || 324 (!MO.isImplicit() && 325 (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 326 AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) { 327 if (SGPRUsed != MO.getReg()) { 328 ++ConstantBusCount; 329 SGPRUsed = MO.getReg(); 330 } 331 } 332 } 333 // Literal constants use the constant bus. 334 if (isLiteralConstant(MO)) 335 ++ConstantBusCount; 336 } 337 if (ConstantBusCount > 1) { 338 ErrInfo = "VOP* instruction uses the constant bus more than once"; 339 return false; 340 } 341 } 342 343 // Verify SRC1 for VOP2 and VOPC 344 if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) { 345 const MachineOperand &Src1 = MI->getOperand(Src1Idx); 346 if (Src1.isImm() || Src1.isFPImm()) { 347 ErrInfo = "VOP[2C] src1 cannot be an immediate."; 348 return false; 349 } 350 } 351 352 // Verify VOP3 353 if (isVOP3(Opcode)) { 354 if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) { 355 ErrInfo = "VOP3 src0 cannot be a literal constant."; 356 return false; 357 } 358 if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) { 359 ErrInfo = "VOP3 src1 cannot be a literal constant."; 360 return false; 361 } 362 if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) { 363 ErrInfo = "VOP3 src2 cannot be a literal constant."; 364 return false; 365 } 366 } 367 return true; 368} 369 370unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 371 switch (MI.getOpcode()) { 372 default: return AMDGPU::INSTRUCTION_LIST_END; 373 case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 374 case AMDGPU::COPY: return AMDGPU::COPY; 375 case AMDGPU::PHI: return AMDGPU::PHI; 376 case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 377 case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 378 case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 379 case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 380 case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 381 case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 382 } 383} 384 385bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 386 return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 387} 388 389const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 390 unsigned OpNo) const { 391 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 392 const MCInstrDesc &Desc = get(MI.getOpcode()); 393 if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 394 Desc.OpInfo[OpNo].RegClass == -1) 395 return MRI.getRegClass(MI.getOperand(OpNo).getReg()); 396 397 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 398 return RI.getRegClass(RCID); 399} 400 401bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 402 switch (MI.getOpcode()) { 403 case AMDGPU::COPY: 404 case AMDGPU::REG_SEQUENCE: 405 return RI.hasVGPRs(getOpRegClass(MI, 0)); 406 default: 407 return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 408 } 409} 410 411void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const { 412 MachineBasicBlock::iterator I = MI; 413 MachineOperand &MO = MI->getOperand(OpIdx); 414 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 415 unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass; 416 const TargetRegisterClass *RC = RI.getRegClass(RCID); 417 unsigned Opcode = AMDGPU::V_MOV_B32_e32; 418 if (MO.isReg()) { 419 Opcode = AMDGPU::COPY; 420 } else if (RI.isSGPRClass(RC)) { 421 Opcode = AMDGPU::S_MOV_B32; 422 } 423 424 unsigned Reg = MRI.createVirtualRegister(RI.getRegClass(RCID)); 425 BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode), 426 Reg).addOperand(MO); 427 MO.ChangeToRegister(Reg, false); 428} 429 430void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { 431 MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 432 int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 433 AMDGPU::OpName::src0); 434 int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 435 AMDGPU::OpName::src1); 436 int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), 437 AMDGPU::OpName::src2); 438 439 // Legalize VOP2 440 if (isVOP2(MI->getOpcode()) && Src1Idx != -1) { 441 MachineOperand &Src1 = MI->getOperand(Src1Idx); 442 443 // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must 444 // be the first operand, and there can only be one. 445 if (Src1.isImm() || Src1.isFPImm() || 446 (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) { 447 if (MI->isCommutable()) { 448 if (commuteInstruction(MI)) 449 return; 450 } 451 legalizeOpWithMove(MI, Src1Idx); 452 } 453 } 454 455 // Legalize VOP3 456 if (isVOP3(MI->getOpcode())) { 457 int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx}; 458 unsigned SGPRReg = AMDGPU::NoRegister; 459 for (unsigned i = 0; i < 3; ++i) { 460 int Idx = VOP3Idx[i]; 461 if (Idx == -1) 462 continue; 463 MachineOperand &MO = MI->getOperand(Idx); 464 465 if (MO.isReg()) { 466 if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 467 continue; // VGPRs are legal 468 469 assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction"); 470 471 if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 472 SGPRReg = MO.getReg(); 473 // We can use one SGPR in each VOP3 instruction. 474 continue; 475 } 476 } else if (!isLiteralConstant(MO)) { 477 // If it is not a register and not a literal constant, then it must be 478 // an inline constant which is always legal. 479 continue; 480 } 481 // If we make it this far, then the operand is not legal and we must 482 // legalize it. 483 legalizeOpWithMove(MI, Idx); 484 } 485 } 486 487 // Legalize REG_SEQUENCE 488 // The register class of the operands much be the same type as the register 489 // class of the output. 490 if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { 491 const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL; 492 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 493 if (!MI->getOperand(i).isReg() || 494 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 495 continue; 496 const TargetRegisterClass *OpRC = 497 MRI.getRegClass(MI->getOperand(i).getReg()); 498 if (RI.hasVGPRs(OpRC)) { 499 VRC = OpRC; 500 } else { 501 SRC = OpRC; 502 } 503 } 504 505 // If any of the operands are VGPR registers, then they all most be 506 // otherwise we will create illegal VGPR->SGPR copies when legalizing 507 // them. 508 if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) { 509 if (!VRC) { 510 assert(SRC); 511 VRC = RI.getEquivalentVGPRClass(SRC); 512 } 513 RC = VRC; 514 } else { 515 RC = SRC; 516 } 517 518 // Update all the operands so they have the same type. 519 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { 520 if (!MI->getOperand(i).isReg() || 521 !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) 522 continue; 523 unsigned DstReg = MRI.createVirtualRegister(RC); 524 BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 525 get(AMDGPU::COPY), DstReg) 526 .addOperand(MI->getOperand(i)); 527 MI->getOperand(i).setReg(DstReg); 528 } 529 } 530} 531 532void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 533 SmallVector<MachineInstr *, 128> Worklist; 534 Worklist.push_back(&TopInst); 535 536 while (!Worklist.empty()) { 537 MachineInstr *Inst = Worklist.pop_back_val(); 538 unsigned NewOpcode = getVALUOp(*Inst); 539 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) 540 continue; 541 542 MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo(); 543 544 // Use the new VALU Opcode. 545 const MCInstrDesc &NewDesc = get(NewOpcode); 546 Inst->setDesc(NewDesc); 547 548 // Remove any references to SCC. Vector instructions can't read from it, and 549 // We're just about to add the implicit use / defs of VCC, and we don't want 550 // both. 551 for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) { 552 MachineOperand &Op = Inst->getOperand(i); 553 if (Op.isReg() && Op.getReg() == AMDGPU::SCC) 554 Inst->RemoveOperand(i); 555 } 556 557 // Add the implict and explicit register definitions. 558 if (NewDesc.ImplicitUses) { 559 for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { 560 unsigned Reg = NewDesc.ImplicitUses[i]; 561 Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); 562 } 563 } 564 565 if (NewDesc.ImplicitDefs) { 566 for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { 567 unsigned Reg = NewDesc.ImplicitDefs[i]; 568 Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); 569 } 570 } 571 572 legalizeOperands(Inst); 573 574 // Update the destination register class. 575 const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); 576 577 switch (Inst->getOpcode()) { 578 // For target instructions, getOpRegClass just returns the virtual 579 // register class associated with the operand, so we need to find an 580 // equivalent VGPR register class in order to move the instruction to the 581 // VALU. 582 case AMDGPU::COPY: 583 case AMDGPU::PHI: 584 case AMDGPU::REG_SEQUENCE: 585 if (RI.hasVGPRs(NewDstRC)) 586 continue; 587 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 588 if (!NewDstRC) 589 continue; 590 break; 591 default: 592 break; 593 } 594 595 unsigned DstReg = Inst->getOperand(0).getReg(); 596 unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); 597 MRI.replaceRegWith(DstReg, NewDstReg); 598 599 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), 600 E = MRI.use_end(); I != E; ++I) { 601 MachineInstr &UseMI = *I; 602 if (!canReadVGPR(UseMI, I.getOperandNo())) { 603 Worklist.push_back(&UseMI); 604 } 605 } 606 } 607} 608 609//===----------------------------------------------------------------------===// 610// Indirect addressing callbacks 611//===----------------------------------------------------------------------===// 612 613unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex, 614 unsigned Channel) const { 615 assert(Channel == 0); 616 return RegIndex; 617} 618 619const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { 620 return &AMDGPU::VReg_32RegClass; 621} 622 623MachineInstrBuilder SIInstrInfo::buildIndirectWrite( 624 MachineBasicBlock *MBB, 625 MachineBasicBlock::iterator I, 626 unsigned ValueReg, 627 unsigned Address, unsigned OffsetReg) const { 628 const DebugLoc &DL = MBB->findDebugLoc(I); 629 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 630 getIndirectIndexBegin(*MBB->getParent())); 631 632 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1)) 633 .addReg(IndirectBaseReg, RegState::Define) 634 .addOperand(I->getOperand(0)) 635 .addReg(IndirectBaseReg) 636 .addReg(OffsetReg) 637 .addImm(0) 638 .addReg(ValueReg); 639} 640 641MachineInstrBuilder SIInstrInfo::buildIndirectRead( 642 MachineBasicBlock *MBB, 643 MachineBasicBlock::iterator I, 644 unsigned ValueReg, 645 unsigned Address, unsigned OffsetReg) const { 646 const DebugLoc &DL = MBB->findDebugLoc(I); 647 unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister( 648 getIndirectIndexBegin(*MBB->getParent())); 649 650 return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC)) 651 .addOperand(I->getOperand(0)) 652 .addOperand(I->getOperand(1)) 653 .addReg(IndirectBaseReg) 654 .addReg(OffsetReg) 655 .addImm(0); 656 657} 658 659void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, 660 const MachineFunction &MF) const { 661 int End = getIndirectIndexEnd(MF); 662 int Begin = getIndirectIndexBegin(MF); 663 664 if (End == -1) 665 return; 666 667 668 for (int Index = Begin; Index <= End; ++Index) 669 Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index)); 670 671 for (int Index = std::max(0, Begin - 1); Index <= End; ++Index) 672 Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index)); 673 674 for (int Index = std::max(0, Begin - 2); Index <= End; ++Index) 675 Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index)); 676 677 for (int Index = std::max(0, Begin - 3); Index <= End; ++Index) 678 Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index)); 679 680 for (int Index = std::max(0, Begin - 7); Index <= End; ++Index) 681 Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index)); 682 683 for (int Index = std::max(0, Begin - 15); Index <= End; ++Index) 684 Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); 685} 686