R600InstrInfo.cpp revision 19a99df130f5747da950faf4ca5170d71f05594c
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600InstrInfo.h" 16#include "AMDGPU.h" 17#include "AMDGPUSubtarget.h" 18#include "AMDGPUTargetMachine.h" 19#include "R600Defines.h" 20#include "R600MachineFunctionInfo.h" 21#include "R600RegisterInfo.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25 26#define GET_INSTRINFO_CTOR 27#include "AMDGPUGenDFAPacketizer.inc" 28 29using namespace llvm; 30 31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32 : AMDGPUInstrInfo(tm), 33 RI(tm), 34 ST(tm.getSubtarget<AMDGPUSubtarget>()) 35 { } 36 37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38 return RI; 39} 40 41bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43} 44 45bool R600InstrInfo::isVector(const MachineInstr &MI) const { 46 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47} 48 49void 50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51 MachineBasicBlock::iterator MI, DebugLoc DL, 52 unsigned DestReg, unsigned SrcReg, 53 bool KillSrc) const { 54 unsigned VectorComponents = 0; 55 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && 56 AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 57 VectorComponents = 4; 58 } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && 59 AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { 60 VectorComponents = 2; 61 } 62 63 if (VectorComponents > 0) { 64 for (unsigned I = 0; I < VectorComponents; I++) { 65 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 66 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 67 RI.getSubReg(DestReg, SubRegIndex), 68 RI.getSubReg(SrcReg, SubRegIndex)) 69 .addReg(DestReg, 70 RegState::Define | RegState::Implicit); 71 } 72 } else { 73 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 74 DestReg, SrcReg); 75 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 76 .setIsKill(KillSrc); 77 } 78} 79 80unsigned R600InstrInfo::getIEQOpcode() const { 81 return AMDGPU::SETE_INT; 82} 83 84bool R600InstrInfo::isMov(unsigned Opcode) const { 85 86 87 switch(Opcode) { 88 default: return false; 89 case AMDGPU::MOV: 90 case AMDGPU::MOV_IMM_F32: 91 case AMDGPU::MOV_IMM_I32: 92 return true; 93 } 94} 95 96// Some instructions act as place holders to emulate operations that the GPU 97// hardware does automatically. This function can be used to check if 98// an opcode falls into this category. 99bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 100 switch (Opcode) { 101 default: return false; 102 case AMDGPU::RETURN: 103 return true; 104 } 105} 106 107bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 108 return false; 109} 110 111bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 112 switch(Opcode) { 113 default: return false; 114 case AMDGPU::CUBE_r600_pseudo: 115 case AMDGPU::CUBE_r600_real: 116 case AMDGPU::CUBE_eg_pseudo: 117 case AMDGPU::CUBE_eg_real: 118 return true; 119 } 120} 121 122bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 123 unsigned TargetFlags = get(Opcode).TSFlags; 124 125 return (TargetFlags & R600_InstFlag::ALU_INST); 126} 127 128bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 129 unsigned TargetFlags = get(Opcode).TSFlags; 130 131 return ((TargetFlags & R600_InstFlag::OP1) | 132 (TargetFlags & R600_InstFlag::OP2) | 133 (TargetFlags & R600_InstFlag::OP3)); 134} 135 136bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 137 unsigned TargetFlags = get(Opcode).TSFlags; 138 139 return ((TargetFlags & R600_InstFlag::LDS_1A) | 140 (TargetFlags & R600_InstFlag::LDS_1A1D) | 141 (TargetFlags & R600_InstFlag::LDS_1A2D)); 142} 143 144bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { 145 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; 146} 147 148bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 149 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 150} 151 152bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { 153 if (isALUInstr(MI->getOpcode())) 154 return true; 155 if (isVector(*MI) || isCubeOp(MI->getOpcode())) 156 return true; 157 switch (MI->getOpcode()) { 158 case AMDGPU::PRED_X: 159 case AMDGPU::INTERP_PAIR_XY: 160 case AMDGPU::INTERP_PAIR_ZW: 161 case AMDGPU::INTERP_VEC_LOAD: 162 case AMDGPU::COPY: 163 case AMDGPU::DOT_4: 164 return true; 165 default: 166 return false; 167 } 168} 169 170bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 171 if (ST.hasCaymanISA()) 172 return false; 173 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 174} 175 176bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 177 return isTransOnly(MI->getOpcode()); 178} 179 180bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 181 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 182} 183 184bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { 185 return isVectorOnly(MI->getOpcode()); 186} 187 188bool R600InstrInfo::isExport(unsigned Opcode) const { 189 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 190} 191 192bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 193 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 194} 195 196bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 197 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 198 return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 199} 200 201bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 202 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 203} 204 205bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 206 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 207 return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 208 usesTextureCache(MI->getOpcode()); 209} 210 211bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 212 switch (Opcode) { 213 case AMDGPU::KILLGT: 214 case AMDGPU::GROUP_BARRIER: 215 return true; 216 default: 217 return false; 218 } 219} 220 221bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { 222 return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 223} 224 225bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { 226 return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 227} 228 229bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { 230 if (!isALUInstr(MI->getOpcode())) { 231 return false; 232 } 233 for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 234 E = MI->operands_end(); I != E; ++I) { 235 if (!I->isReg() || !I->isUse() || 236 TargetRegisterInfo::isVirtualRegister(I->getReg())) 237 continue; 238 239 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 240 return true; 241 } 242 return false; 243} 244 245int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 246 static const unsigned OpTable[] = { 247 AMDGPU::OpName::src0, 248 AMDGPU::OpName::src1, 249 AMDGPU::OpName::src2 250 }; 251 252 assert (SrcNum < 3); 253 return getOperandIdx(Opcode, OpTable[SrcNum]); 254} 255 256#define SRC_SEL_ROWS 11 257int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 258 static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 259 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 260 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 261 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 262 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 263 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 264 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 265 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 266 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 267 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 268 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 269 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 270 }; 271 272 for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 273 if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 274 return getOperandIdx(Opcode, SrcSelTable[i][1]); 275 } 276 } 277 return -1; 278} 279#undef SRC_SEL_ROWS 280 281SmallVector<std::pair<MachineOperand *, int64_t>, 3> 282R600InstrInfo::getSrcs(MachineInstr *MI) const { 283 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 284 285 if (MI->getOpcode() == AMDGPU::DOT_4) { 286 static const unsigned OpTable[8][2] = { 287 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 288 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 289 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 290 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 291 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 292 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 293 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 294 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 295 }; 296 297 for (unsigned j = 0; j < 8; j++) { 298 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 299 OpTable[j][0])); 300 unsigned Reg = MO.getReg(); 301 if (Reg == AMDGPU::ALU_CONST) { 302 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 303 OpTable[j][1])).getImm(); 304 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 305 continue; 306 } 307 308 } 309 return Result; 310 } 311 312 static const unsigned OpTable[3][2] = { 313 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 314 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 315 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 316 }; 317 318 for (unsigned j = 0; j < 3; j++) { 319 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 320 if (SrcIdx < 0) 321 break; 322 MachineOperand &MO = MI->getOperand(SrcIdx); 323 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 324 if (Reg == AMDGPU::ALU_CONST) { 325 unsigned Sel = MI->getOperand( 326 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 327 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 328 continue; 329 } 330 if (Reg == AMDGPU::ALU_LITERAL_X) { 331 unsigned Imm = MI->getOperand( 332 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 333 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 334 continue; 335 } 336 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 337 } 338 return Result; 339} 340 341std::vector<std::pair<int, unsigned> > 342R600InstrInfo::ExtractSrcs(MachineInstr *MI, 343 const DenseMap<unsigned, unsigned> &PV, 344 unsigned &ConstCount) const { 345 ConstCount = 0; 346 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 347 const std::pair<int, unsigned> DummyPair(-1, 0); 348 std::vector<std::pair<int, unsigned> > Result; 349 unsigned i = 0; 350 for (unsigned n = Srcs.size(); i < n; ++i) { 351 unsigned Reg = Srcs[i].first->getReg(); 352 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 353 if (Reg == AMDGPU::OQAP) { 354 Result.push_back(std::pair<int, unsigned>(Index, 0)); 355 } 356 if (PV.find(Reg) != PV.end()) { 357 // 255 is used to tells its a PS/PV reg 358 Result.push_back(std::pair<int, unsigned>(255, 0)); 359 continue; 360 } 361 if (Index > 127) { 362 ConstCount++; 363 Result.push_back(DummyPair); 364 continue; 365 } 366 unsigned Chan = RI.getHWRegChan(Reg); 367 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 368 } 369 for (; i < 3; ++i) 370 Result.push_back(DummyPair); 371 return Result; 372} 373 374static std::vector<std::pair<int, unsigned> > 375Swizzle(std::vector<std::pair<int, unsigned> > Src, 376 R600InstrInfo::BankSwizzle Swz) { 377 if (Src[0] == Src[1]) 378 Src[1].first = -1; 379 switch (Swz) { 380 case R600InstrInfo::ALU_VEC_012_SCL_210: 381 break; 382 case R600InstrInfo::ALU_VEC_021_SCL_122: 383 std::swap(Src[1], Src[2]); 384 break; 385 case R600InstrInfo::ALU_VEC_102_SCL_221: 386 std::swap(Src[0], Src[1]); 387 break; 388 case R600InstrInfo::ALU_VEC_120_SCL_212: 389 std::swap(Src[0], Src[1]); 390 std::swap(Src[0], Src[2]); 391 break; 392 case R600InstrInfo::ALU_VEC_201: 393 std::swap(Src[0], Src[2]); 394 std::swap(Src[0], Src[1]); 395 break; 396 case R600InstrInfo::ALU_VEC_210: 397 std::swap(Src[0], Src[2]); 398 break; 399 } 400 return Src; 401} 402 403static unsigned 404getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 405 switch (Swz) { 406 case R600InstrInfo::ALU_VEC_012_SCL_210: { 407 unsigned Cycles[3] = { 2, 1, 0}; 408 return Cycles[Op]; 409 } 410 case R600InstrInfo::ALU_VEC_021_SCL_122: { 411 unsigned Cycles[3] = { 1, 2, 2}; 412 return Cycles[Op]; 413 } 414 case R600InstrInfo::ALU_VEC_120_SCL_212: { 415 unsigned Cycles[3] = { 2, 1, 2}; 416 return Cycles[Op]; 417 } 418 case R600InstrInfo::ALU_VEC_102_SCL_221: { 419 unsigned Cycles[3] = { 2, 2, 1}; 420 return Cycles[Op]; 421 } 422 default: 423 llvm_unreachable("Wrong Swizzle for Trans Slot"); 424 return 0; 425 } 426} 427 428/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 429/// in the same Instruction Group while meeting read port limitations given a 430/// Swz swizzle sequence. 431unsigned R600InstrInfo::isLegalUpTo( 432 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 433 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 434 const std::vector<std::pair<int, unsigned> > &TransSrcs, 435 R600InstrInfo::BankSwizzle TransSwz) const { 436 int Vector[4][3]; 437 memset(Vector, -1, sizeof(Vector)); 438 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 439 const std::vector<std::pair<int, unsigned> > &Srcs = 440 Swizzle(IGSrcs[i], Swz[i]); 441 for (unsigned j = 0; j < 3; j++) { 442 const std::pair<int, unsigned> &Src = Srcs[j]; 443 if (Src.first < 0 || Src.first == 255) 444 continue; 445 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 446 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 447 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 448 // The value from output queue A (denoted by register OQAP) can 449 // only be fetched during the first cycle. 450 return false; 451 } 452 // OQAP does not count towards the normal read port restrictions 453 continue; 454 } 455 if (Vector[Src.second][j] < 0) 456 Vector[Src.second][j] = Src.first; 457 if (Vector[Src.second][j] != Src.first) 458 return i; 459 } 460 } 461 // Now check Trans Alu 462 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 463 const std::pair<int, unsigned> &Src = TransSrcs[i]; 464 unsigned Cycle = getTransSwizzle(TransSwz, i); 465 if (Src.first < 0) 466 continue; 467 if (Src.first == 255) 468 continue; 469 if (Vector[Src.second][Cycle] < 0) 470 Vector[Src.second][Cycle] = Src.first; 471 if (Vector[Src.second][Cycle] != Src.first) 472 return IGSrcs.size() - 1; 473 } 474 return IGSrcs.size(); 475} 476 477/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 478/// (in lexicographic term) swizzle sequence assuming that all swizzles after 479/// Idx can be skipped 480static bool 481NextPossibleSolution( 482 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 483 unsigned Idx) { 484 assert(Idx < SwzCandidate.size()); 485 int ResetIdx = Idx; 486 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 487 ResetIdx --; 488 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 489 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 490 } 491 if (ResetIdx == -1) 492 return false; 493 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 494 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 495 return true; 496} 497 498/// Enumerate all possible Swizzle sequence to find one that can meet all 499/// read port requirements. 500bool R600InstrInfo::FindSwizzleForVectorSlot( 501 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 502 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 503 const std::vector<std::pair<int, unsigned> > &TransSrcs, 504 R600InstrInfo::BankSwizzle TransSwz) const { 505 unsigned ValidUpTo = 0; 506 do { 507 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 508 if (ValidUpTo == IGSrcs.size()) 509 return true; 510 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 511 return false; 512} 513 514/// Instructions in Trans slot can't read gpr at cycle 0 if they also read 515/// a const, and can't read a gpr at cycle 1 if they read 2 const. 516static bool 517isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 518 const std::vector<std::pair<int, unsigned> > &TransOps, 519 unsigned ConstCount) { 520 // TransALU can't read 3 constants 521 if (ConstCount > 2) 522 return false; 523 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 524 const std::pair<int, unsigned> &Src = TransOps[i]; 525 unsigned Cycle = getTransSwizzle(TransSwz, i); 526 if (Src.first < 0) 527 continue; 528 if (ConstCount > 0 && Cycle == 0) 529 return false; 530 if (ConstCount > 1 && Cycle == 1) 531 return false; 532 } 533 return true; 534} 535 536bool 537R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 538 const DenseMap<unsigned, unsigned> &PV, 539 std::vector<BankSwizzle> &ValidSwizzle, 540 bool isLastAluTrans) 541 const { 542 //Todo : support shared src0 - src1 operand 543 544 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 545 ValidSwizzle.clear(); 546 unsigned ConstCount; 547 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 548 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 549 IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 550 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 551 AMDGPU::OpName::bank_swizzle); 552 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 553 IG[i]->getOperand(Op).getImm()); 554 } 555 std::vector<std::pair<int, unsigned> > TransOps; 556 if (!isLastAluTrans) 557 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 558 559 TransOps = IGSrcs.back(); 560 IGSrcs.pop_back(); 561 ValidSwizzle.pop_back(); 562 563 static const R600InstrInfo::BankSwizzle TransSwz[] = { 564 ALU_VEC_012_SCL_210, 565 ALU_VEC_021_SCL_122, 566 ALU_VEC_120_SCL_212, 567 ALU_VEC_102_SCL_221 568 }; 569 for (unsigned i = 0; i < 4; i++) { 570 TransBS = TransSwz[i]; 571 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 572 continue; 573 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 574 TransBS); 575 if (Result) { 576 ValidSwizzle.push_back(TransBS); 577 return true; 578 } 579 } 580 581 return false; 582} 583 584 585bool 586R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 587 const { 588 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 589 unsigned Pair1 = 0, Pair2 = 0; 590 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 591 unsigned ReadConstHalf = Consts[i] & 2; 592 unsigned ReadConstIndex = Consts[i] & (~3); 593 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 594 if (!Pair1) { 595 Pair1 = ReadHalfConst; 596 continue; 597 } 598 if (Pair1 == ReadHalfConst) 599 continue; 600 if (!Pair2) { 601 Pair2 = ReadHalfConst; 602 continue; 603 } 604 if (Pair2 != ReadHalfConst) 605 return false; 606 } 607 return true; 608} 609 610bool 611R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 612 const { 613 std::vector<unsigned> Consts; 614 SmallSet<int64_t, 4> Literals; 615 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 616 MachineInstr *MI = MIs[i]; 617 if (!isALUInstr(MI->getOpcode())) 618 continue; 619 620 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 621 getSrcs(MI); 622 623 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 624 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 625 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 626 Literals.insert(Src.second); 627 if (Literals.size() > 4) 628 return false; 629 if (Src.first->getReg() == AMDGPU::ALU_CONST) 630 Consts.push_back(Src.second); 631 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 632 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 633 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 634 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 635 Consts.push_back((Index << 2) | Chan); 636 } 637 } 638 } 639 return fitsConstReadLimitations(Consts); 640} 641 642DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 643 const ScheduleDAG *DAG) const { 644 const InstrItineraryData *II = TM->getInstrItineraryData(); 645 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 646} 647 648static bool 649isPredicateSetter(unsigned Opcode) { 650 switch (Opcode) { 651 case AMDGPU::PRED_X: 652 return true; 653 default: 654 return false; 655 } 656} 657 658static MachineInstr * 659findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 660 MachineBasicBlock::iterator I) { 661 while (I != MBB.begin()) { 662 --I; 663 MachineInstr *MI = I; 664 if (isPredicateSetter(MI->getOpcode())) 665 return MI; 666 } 667 668 return NULL; 669} 670 671static 672bool isJump(unsigned Opcode) { 673 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 674} 675 676static bool isBranch(unsigned Opcode) { 677 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 678 Opcode == AMDGPU::BRANCH_COND_f32; 679} 680 681bool 682R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 683 MachineBasicBlock *&TBB, 684 MachineBasicBlock *&FBB, 685 SmallVectorImpl<MachineOperand> &Cond, 686 bool AllowModify) const { 687 // Most of the following comes from the ARM implementation of AnalyzeBranch 688 689 // If the block has no terminators, it just falls into the block after it. 690 MachineBasicBlock::iterator I = MBB.end(); 691 if (I == MBB.begin()) 692 return false; 693 --I; 694 while (I->isDebugValue()) { 695 if (I == MBB.begin()) 696 return false; 697 --I; 698 } 699 // AMDGPU::BRANCH* instructions are only available after isel and are not 700 // handled 701 if (isBranch(I->getOpcode())) 702 return true; 703 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 704 return false; 705 } 706 707 // Get the last instruction in the block. 708 MachineInstr *LastInst = I; 709 710 // If there is only one terminator instruction, process it. 711 unsigned LastOpc = LastInst->getOpcode(); 712 if (I == MBB.begin() || 713 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 714 if (LastOpc == AMDGPU::JUMP) { 715 TBB = LastInst->getOperand(0).getMBB(); 716 return false; 717 } else if (LastOpc == AMDGPU::JUMP_COND) { 718 MachineInstr *predSet = I; 719 while (!isPredicateSetter(predSet->getOpcode())) { 720 predSet = --I; 721 } 722 TBB = LastInst->getOperand(0).getMBB(); 723 Cond.push_back(predSet->getOperand(1)); 724 Cond.push_back(predSet->getOperand(2)); 725 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 726 return false; 727 } 728 return true; // Can't handle indirect branch. 729 } 730 731 // Get the instruction before it if it is a terminator. 732 MachineInstr *SecondLastInst = I; 733 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 734 735 // If the block ends with a B and a Bcc, handle it. 736 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 737 MachineInstr *predSet = --I; 738 while (!isPredicateSetter(predSet->getOpcode())) { 739 predSet = --I; 740 } 741 TBB = SecondLastInst->getOperand(0).getMBB(); 742 FBB = LastInst->getOperand(0).getMBB(); 743 Cond.push_back(predSet->getOperand(1)); 744 Cond.push_back(predSet->getOperand(2)); 745 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 746 return false; 747 } 748 749 // Otherwise, can't handle this. 750 return true; 751} 752 753int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 754 const MachineInstr *MI = op.getParent(); 755 756 switch (MI->getDesc().OpInfo->RegClass) { 757 default: // FIXME: fallthrough?? 758 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 759 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 760 }; 761} 762 763static 764MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 765 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 766 It != E; ++It) { 767 if (It->getOpcode() == AMDGPU::CF_ALU || 768 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 769 return llvm::prior(It.base()); 770 } 771 return MBB.end(); 772} 773 774unsigned 775R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 776 MachineBasicBlock *TBB, 777 MachineBasicBlock *FBB, 778 const SmallVectorImpl<MachineOperand> &Cond, 779 DebugLoc DL) const { 780 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 781 782 if (FBB == 0) { 783 if (Cond.empty()) { 784 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 785 return 1; 786 } else { 787 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 788 assert(PredSet && "No previous predicate !"); 789 addFlag(PredSet, 0, MO_FLAG_PUSH); 790 PredSet->getOperand(2).setImm(Cond[1].getImm()); 791 792 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 793 .addMBB(TBB) 794 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 795 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 796 if (CfAlu == MBB.end()) 797 return 1; 798 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 799 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 800 return 1; 801 } 802 } else { 803 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 804 assert(PredSet && "No previous predicate !"); 805 addFlag(PredSet, 0, MO_FLAG_PUSH); 806 PredSet->getOperand(2).setImm(Cond[1].getImm()); 807 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 808 .addMBB(TBB) 809 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 810 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 811 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 812 if (CfAlu == MBB.end()) 813 return 2; 814 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 815 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 816 return 2; 817 } 818} 819 820unsigned 821R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 822 823 // Note : we leave PRED* instructions there. 824 // They may be needed when predicating instructions. 825 826 MachineBasicBlock::iterator I = MBB.end(); 827 828 if (I == MBB.begin()) { 829 return 0; 830 } 831 --I; 832 switch (I->getOpcode()) { 833 default: 834 return 0; 835 case AMDGPU::JUMP_COND: { 836 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 837 clearFlag(predSet, 0, MO_FLAG_PUSH); 838 I->eraseFromParent(); 839 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 840 if (CfAlu == MBB.end()) 841 break; 842 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 843 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 844 break; 845 } 846 case AMDGPU::JUMP: 847 I->eraseFromParent(); 848 break; 849 } 850 I = MBB.end(); 851 852 if (I == MBB.begin()) { 853 return 1; 854 } 855 --I; 856 switch (I->getOpcode()) { 857 // FIXME: only one case?? 858 default: 859 return 1; 860 case AMDGPU::JUMP_COND: { 861 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 862 clearFlag(predSet, 0, MO_FLAG_PUSH); 863 I->eraseFromParent(); 864 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 865 if (CfAlu == MBB.end()) 866 break; 867 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 868 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 869 break; 870 } 871 case AMDGPU::JUMP: 872 I->eraseFromParent(); 873 break; 874 } 875 return 2; 876} 877 878bool 879R600InstrInfo::isPredicated(const MachineInstr *MI) const { 880 int idx = MI->findFirstPredOperandIdx(); 881 if (idx < 0) 882 return false; 883 884 unsigned Reg = MI->getOperand(idx).getReg(); 885 switch (Reg) { 886 default: return false; 887 case AMDGPU::PRED_SEL_ONE: 888 case AMDGPU::PRED_SEL_ZERO: 889 case AMDGPU::PREDICATE_BIT: 890 return true; 891 } 892} 893 894bool 895R600InstrInfo::isPredicable(MachineInstr *MI) const { 896 // XXX: KILL* instructions can be predicated, but they must be the last 897 // instruction in a clause, so this means any instructions after them cannot 898 // be predicated. Until we have proper support for instruction clauses in the 899 // backend, we will mark KILL* instructions as unpredicable. 900 901 if (MI->getOpcode() == AMDGPU::KILLGT) { 902 return false; 903 } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 904 // If the clause start in the middle of MBB then the MBB has more 905 // than a single clause, unable to predicate several clauses. 906 if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 907 return false; 908 // TODO: We don't support KC merging atm 909 if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 910 return false; 911 return true; 912 } else if (isVector(*MI)) { 913 return false; 914 } else { 915 return AMDGPUInstrInfo::isPredicable(MI); 916 } 917} 918 919 920bool 921R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 922 unsigned NumCyles, 923 unsigned ExtraPredCycles, 924 const BranchProbability &Probability) const{ 925 return true; 926} 927 928bool 929R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 930 unsigned NumTCycles, 931 unsigned ExtraTCycles, 932 MachineBasicBlock &FMBB, 933 unsigned NumFCycles, 934 unsigned ExtraFCycles, 935 const BranchProbability &Probability) const { 936 return true; 937} 938 939bool 940R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 941 unsigned NumCyles, 942 const BranchProbability &Probability) 943 const { 944 return true; 945} 946 947bool 948R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 949 MachineBasicBlock &FMBB) const { 950 return false; 951} 952 953 954bool 955R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 956 MachineOperand &MO = Cond[1]; 957 switch (MO.getImm()) { 958 case OPCODE_IS_ZERO_INT: 959 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 960 break; 961 case OPCODE_IS_NOT_ZERO_INT: 962 MO.setImm(OPCODE_IS_ZERO_INT); 963 break; 964 case OPCODE_IS_ZERO: 965 MO.setImm(OPCODE_IS_NOT_ZERO); 966 break; 967 case OPCODE_IS_NOT_ZERO: 968 MO.setImm(OPCODE_IS_ZERO); 969 break; 970 default: 971 return true; 972 } 973 974 MachineOperand &MO2 = Cond[2]; 975 switch (MO2.getReg()) { 976 case AMDGPU::PRED_SEL_ZERO: 977 MO2.setReg(AMDGPU::PRED_SEL_ONE); 978 break; 979 case AMDGPU::PRED_SEL_ONE: 980 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 981 break; 982 default: 983 return true; 984 } 985 return false; 986} 987 988bool 989R600InstrInfo::DefinesPredicate(MachineInstr *MI, 990 std::vector<MachineOperand> &Pred) const { 991 return isPredicateSetter(MI->getOpcode()); 992} 993 994 995bool 996R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 997 const SmallVectorImpl<MachineOperand> &Pred2) const { 998 return false; 999} 1000 1001 1002bool 1003R600InstrInfo::PredicateInstruction(MachineInstr *MI, 1004 const SmallVectorImpl<MachineOperand> &Pred) const { 1005 int PIdx = MI->findFirstPredOperandIdx(); 1006 1007 if (MI->getOpcode() == AMDGPU::CF_ALU) { 1008 MI->getOperand(8).setImm(0); 1009 return true; 1010 } 1011 1012 if (PIdx != -1) { 1013 MachineOperand &PMO = MI->getOperand(PIdx); 1014 PMO.setReg(Pred[2].getReg()); 1015 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1016 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1017 return true; 1018 } 1019 1020 return false; 1021} 1022 1023unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { 1024 return 2; 1025} 1026 1027unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1028 const MachineInstr *MI, 1029 unsigned *PredCost) const { 1030 if (PredCost) 1031 *PredCost = 2; 1032 return 2; 1033} 1034 1035void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1036 const MachineFunction &MF) const { 1037 const AMDGPUFrameLowering *TFL = 1038 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 1039 1040 unsigned StackWidth = TFL->getStackWidth(MF); 1041 int End = getIndirectIndexEnd(MF); 1042 1043 if (End == -1) 1044 return; 1045 1046 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1047 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1048 Reserved.set(SuperReg); 1049 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1050 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1051 Reserved.set(Reg); 1052 } 1053 } 1054} 1055 1056unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1057 unsigned Channel) const { 1058 // XXX: Remove when we support a stack width > 2 1059 assert(Channel == 0); 1060 return RegIndex; 1061} 1062 1063const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1064 return &AMDGPU::R600_TReg32_XRegClass; 1065} 1066 1067MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1068 MachineBasicBlock::iterator I, 1069 unsigned ValueReg, unsigned Address, 1070 unsigned OffsetReg) const { 1071 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1072 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1073 AMDGPU::AR_X, OffsetReg); 1074 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1075 1076 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1077 AddrReg, ValueReg) 1078 .addReg(AMDGPU::AR_X, 1079 RegState::Implicit | RegState::Kill); 1080 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 1081 return Mov; 1082} 1083 1084MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1085 MachineBasicBlock::iterator I, 1086 unsigned ValueReg, unsigned Address, 1087 unsigned OffsetReg) const { 1088 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1089 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1090 AMDGPU::AR_X, 1091 OffsetReg); 1092 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1093 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1094 ValueReg, 1095 AddrReg) 1096 .addReg(AMDGPU::AR_X, 1097 RegState::Implicit | RegState::Kill); 1098 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 1099 1100 return Mov; 1101} 1102 1103unsigned R600InstrInfo::getMaxAlusPerClause() const { 1104 return 115; 1105} 1106 1107MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1108 MachineBasicBlock::iterator I, 1109 unsigned Opcode, 1110 unsigned DstReg, 1111 unsigned Src0Reg, 1112 unsigned Src1Reg) const { 1113 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1114 DstReg); // $dst 1115 1116 if (Src1Reg) { 1117 MIB.addImm(0) // $update_exec_mask 1118 .addImm(0); // $update_predicate 1119 } 1120 MIB.addImm(1) // $write 1121 .addImm(0) // $omod 1122 .addImm(0) // $dst_rel 1123 .addImm(0) // $dst_clamp 1124 .addReg(Src0Reg) // $src0 1125 .addImm(0) // $src0_neg 1126 .addImm(0) // $src0_rel 1127 .addImm(0) // $src0_abs 1128 .addImm(-1); // $src0_sel 1129 1130 if (Src1Reg) { 1131 MIB.addReg(Src1Reg) // $src1 1132 .addImm(0) // $src1_neg 1133 .addImm(0) // $src1_rel 1134 .addImm(0) // $src1_abs 1135 .addImm(-1); // $src1_sel 1136 } 1137 1138 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1139 //scheduling to the backend, we can change the default to 0. 1140 MIB.addImm(1) // $last 1141 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1142 .addImm(0) // $literal 1143 .addImm(0); // $bank_swizzle 1144 1145 return MIB; 1146} 1147 1148#define OPERAND_CASE(Label) \ 1149 case Label: { \ 1150 static const unsigned Ops[] = \ 1151 { \ 1152 Label##_X, \ 1153 Label##_Y, \ 1154 Label##_Z, \ 1155 Label##_W \ 1156 }; \ 1157 return Ops[Slot]; \ 1158 } 1159 1160static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1161 switch (Op) { 1162 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1163 OPERAND_CASE(AMDGPU::OpName::update_pred) 1164 OPERAND_CASE(AMDGPU::OpName::write) 1165 OPERAND_CASE(AMDGPU::OpName::omod) 1166 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1167 OPERAND_CASE(AMDGPU::OpName::clamp) 1168 OPERAND_CASE(AMDGPU::OpName::src0) 1169 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1170 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1171 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1172 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1173 OPERAND_CASE(AMDGPU::OpName::src1) 1174 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1175 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1176 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1177 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1178 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1179 default: 1180 llvm_unreachable("Wrong Operand"); 1181 } 1182} 1183 1184#undef OPERAND_CASE 1185 1186MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1187 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1188 const { 1189 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1190 unsigned Opcode; 1191 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 1192 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 1193 Opcode = AMDGPU::DOT4_r600; 1194 else 1195 Opcode = AMDGPU::DOT4_eg; 1196 MachineBasicBlock::iterator I = MI; 1197 MachineOperand &Src0 = MI->getOperand( 1198 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1199 MachineOperand &Src1 = MI->getOperand( 1200 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1201 MachineInstr *MIB = buildDefaultInstruction( 1202 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1203 static const unsigned Operands[14] = { 1204 AMDGPU::OpName::update_exec_mask, 1205 AMDGPU::OpName::update_pred, 1206 AMDGPU::OpName::write, 1207 AMDGPU::OpName::omod, 1208 AMDGPU::OpName::dst_rel, 1209 AMDGPU::OpName::clamp, 1210 AMDGPU::OpName::src0_neg, 1211 AMDGPU::OpName::src0_rel, 1212 AMDGPU::OpName::src0_abs, 1213 AMDGPU::OpName::src0_sel, 1214 AMDGPU::OpName::src1_neg, 1215 AMDGPU::OpName::src1_rel, 1216 AMDGPU::OpName::src1_abs, 1217 AMDGPU::OpName::src1_sel, 1218 }; 1219 1220 for (unsigned i = 0; i < 14; i++) { 1221 MachineOperand &MO = MI->getOperand( 1222 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1223 assert (MO.isImm()); 1224 setImmOperand(MIB, Operands[i], MO.getImm()); 1225 } 1226 MIB->getOperand(20).setImm(0); 1227 return MIB; 1228} 1229 1230MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1231 MachineBasicBlock::iterator I, 1232 unsigned DstReg, 1233 uint64_t Imm) const { 1234 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1235 AMDGPU::ALU_LITERAL_X); 1236 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 1237 return MovImm; 1238} 1239 1240MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1241 MachineBasicBlock::iterator I, 1242 unsigned DstReg, unsigned SrcReg) const { 1243 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1244} 1245 1246int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1247 return getOperandIdx(MI.getOpcode(), Op); 1248} 1249 1250int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1251 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1252} 1253 1254void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 1255 int64_t Imm) const { 1256 int Idx = getOperandIdx(*MI, Op); 1257 assert(Idx != -1 && "Operand not supported for this instruction."); 1258 assert(MI->getOperand(Idx).isImm()); 1259 MI->getOperand(Idx).setImm(Imm); 1260} 1261 1262//===----------------------------------------------------------------------===// 1263// Instruction flag getters/setters 1264//===----------------------------------------------------------------------===// 1265 1266bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1267 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1268} 1269 1270MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1271 unsigned Flag) const { 1272 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1273 int FlagIndex = 0; 1274 if (Flag != 0) { 1275 // If we pass something other than the default value of Flag to this 1276 // function, it means we are want to set a flag on an instruction 1277 // that uses native encoding. 1278 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1279 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1280 switch (Flag) { 1281 case MO_FLAG_CLAMP: 1282 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1283 break; 1284 case MO_FLAG_MASK: 1285 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1286 break; 1287 case MO_FLAG_NOT_LAST: 1288 case MO_FLAG_LAST: 1289 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1290 break; 1291 case MO_FLAG_NEG: 1292 switch (SrcIdx) { 1293 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1294 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1295 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1296 } 1297 break; 1298 1299 case MO_FLAG_ABS: 1300 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1301 "instructions."); 1302 (void)IsOP3; 1303 switch (SrcIdx) { 1304 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1305 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1306 } 1307 break; 1308 1309 default: 1310 FlagIndex = -1; 1311 break; 1312 } 1313 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1314 } else { 1315 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1316 assert(FlagIndex != 0 && 1317 "Instruction flags not supported for this instruction"); 1318 } 1319 1320 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1321 assert(FlagOp.isImm()); 1322 return FlagOp; 1323} 1324 1325void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1326 unsigned Flag) const { 1327 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1328 if (Flag == 0) { 1329 return; 1330 } 1331 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1332 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1333 if (Flag == MO_FLAG_NOT_LAST) { 1334 clearFlag(MI, Operand, MO_FLAG_LAST); 1335 } else if (Flag == MO_FLAG_MASK) { 1336 clearFlag(MI, Operand, Flag); 1337 } else { 1338 FlagOp.setImm(1); 1339 } 1340 } else { 1341 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1342 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1343 } 1344} 1345 1346void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1347 unsigned Flag) const { 1348 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1349 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1350 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1351 FlagOp.setImm(0); 1352 } else { 1353 MachineOperand &FlagOp = getFlagOp(MI); 1354 unsigned InstFlags = FlagOp.getImm(); 1355 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1356 FlagOp.setImm(InstFlags); 1357 } 1358} 1359