R600InstrInfo.cpp revision b21ab43cfc3fa0dacf5c95f04e58b6d804b59a16
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600InstrInfo.h" 16#include "AMDGPU.h" 17#include "AMDGPUSubtarget.h" 18#include "AMDGPUTargetMachine.h" 19#include "R600Defines.h" 20#include "R600MachineFunctionInfo.h" 21#include "R600RegisterInfo.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25 26#define GET_INSTRINFO_CTOR 27#include "AMDGPUGenDFAPacketizer.inc" 28 29using namespace llvm; 30 31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32 : AMDGPUInstrInfo(tm), 33 RI(tm), 34 ST(tm.getSubtarget<AMDGPUSubtarget>()) 35 { } 36 37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38 return RI; 39} 40 41bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43} 44 45bool R600InstrInfo::isVector(const MachineInstr &MI) const { 46 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47} 48 49void 50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51 MachineBasicBlock::iterator MI, DebugLoc DL, 52 unsigned DestReg, unsigned SrcReg, 53 bool KillSrc) const { 54 unsigned VectorComponents = 0; 55 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && 56 AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 57 VectorComponents = 4; 58 } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && 59 AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { 60 VectorComponents = 2; 61 } 62 63 if (VectorComponents > 0) { 64 for (unsigned I = 0; I < VectorComponents; I++) { 65 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 66 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 67 RI.getSubReg(DestReg, SubRegIndex), 68 RI.getSubReg(SrcReg, SubRegIndex)) 69 .addReg(DestReg, 70 RegState::Define | RegState::Implicit); 71 } 72 } else { 73 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 74 DestReg, SrcReg); 75 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 76 .setIsKill(KillSrc); 77 } 78} 79 80unsigned R600InstrInfo::getIEQOpcode() const { 81 return AMDGPU::SETE_INT; 82} 83 84bool R600InstrInfo::isMov(unsigned Opcode) const { 85 86 87 switch(Opcode) { 88 default: return false; 89 case AMDGPU::MOV: 90 case AMDGPU::MOV_IMM_F32: 91 case AMDGPU::MOV_IMM_I32: 92 return true; 93 } 94} 95 96// Some instructions act as place holders to emulate operations that the GPU 97// hardware does automatically. This function can be used to check if 98// an opcode falls into this category. 99bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 100 switch (Opcode) { 101 default: return false; 102 case AMDGPU::RETURN: 103 return true; 104 } 105} 106 107bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 108 return false; 109} 110 111bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 112 switch(Opcode) { 113 default: return false; 114 case AMDGPU::CUBE_r600_pseudo: 115 case AMDGPU::CUBE_r600_real: 116 case AMDGPU::CUBE_eg_pseudo: 117 case AMDGPU::CUBE_eg_real: 118 return true; 119 } 120} 121 122bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 123 unsigned TargetFlags = get(Opcode).TSFlags; 124 125 return (TargetFlags & R600_InstFlag::ALU_INST); 126} 127 128bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 129 unsigned TargetFlags = get(Opcode).TSFlags; 130 131 return ((TargetFlags & R600_InstFlag::OP1) | 132 (TargetFlags & R600_InstFlag::OP2) | 133 (TargetFlags & R600_InstFlag::OP3)); 134} 135 136bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 137 unsigned TargetFlags = get(Opcode).TSFlags; 138 139 return ((TargetFlags & R600_InstFlag::LDS_1A) | 140 (TargetFlags & R600_InstFlag::LDS_1A1D) | 141 (TargetFlags & R600_InstFlag::LDS_1A2D)); 142} 143 144bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { 145 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; 146} 147 148bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 149 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 150} 151 152bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { 153 if (isALUInstr(MI->getOpcode())) 154 return true; 155 if (isVector(*MI) || isCubeOp(MI->getOpcode())) 156 return true; 157 switch (MI->getOpcode()) { 158 case AMDGPU::PRED_X: 159 case AMDGPU::INTERP_PAIR_XY: 160 case AMDGPU::INTERP_PAIR_ZW: 161 case AMDGPU::INTERP_VEC_LOAD: 162 case AMDGPU::COPY: 163 case AMDGPU::DOT_4: 164 return true; 165 default: 166 return false; 167 } 168} 169 170bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 171 if (ST.hasCaymanISA()) 172 return false; 173 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 174} 175 176bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 177 return isTransOnly(MI->getOpcode()); 178} 179 180bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 181 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 182} 183 184bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { 185 return isVectorOnly(MI->getOpcode()); 186} 187 188bool R600InstrInfo::isExport(unsigned Opcode) const { 189 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 190} 191 192bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 193 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 194} 195 196bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 197 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 198 return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 199} 200 201bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 202 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 203} 204 205bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 206 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 207 return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 208 usesTextureCache(MI->getOpcode()); 209} 210 211bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 212 switch (Opcode) { 213 case AMDGPU::KILLGT: 214 case AMDGPU::GROUP_BARRIER: 215 return true; 216 default: 217 return false; 218 } 219} 220 221bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { 222 return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 223} 224 225bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { 226 return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 227} 228 229bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { 230 if (!isALUInstr(MI->getOpcode())) { 231 return false; 232 } 233 for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 234 E = MI->operands_end(); I != E; ++I) { 235 if (!I->isReg() || !I->isUse() || 236 TargetRegisterInfo::isVirtualRegister(I->getReg())) 237 continue; 238 239 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 240 return true; 241 } 242 return false; 243} 244 245int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 246 static const unsigned OpTable[] = { 247 AMDGPU::OpName::src0, 248 AMDGPU::OpName::src1, 249 AMDGPU::OpName::src2 250 }; 251 252 assert (SrcNum < 3); 253 return getOperandIdx(Opcode, OpTable[SrcNum]); 254} 255 256#define SRC_SEL_ROWS 11 257int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 258 static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 259 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 260 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 261 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 262 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 263 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 264 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 265 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 266 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 267 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 268 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 269 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 270 }; 271 272 for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 273 if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 274 return getOperandIdx(Opcode, SrcSelTable[i][1]); 275 } 276 } 277 return -1; 278} 279#undef SRC_SEL_ROWS 280 281SmallVector<std::pair<MachineOperand *, int64_t>, 3> 282R600InstrInfo::getSrcs(MachineInstr *MI) const { 283 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 284 285 if (MI->getOpcode() == AMDGPU::DOT_4) { 286 static const unsigned OpTable[8][2] = { 287 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 288 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 289 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 290 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 291 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 292 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 293 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 294 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 295 }; 296 297 for (unsigned j = 0; j < 8; j++) { 298 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 299 OpTable[j][0])); 300 unsigned Reg = MO.getReg(); 301 if (Reg == AMDGPU::ALU_CONST) { 302 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 303 OpTable[j][1])).getImm(); 304 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 305 continue; 306 } 307 308 } 309 return Result; 310 } 311 312 static const unsigned OpTable[3][2] = { 313 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 314 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 315 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 316 }; 317 318 for (unsigned j = 0; j < 3; j++) { 319 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 320 if (SrcIdx < 0) 321 break; 322 MachineOperand &MO = MI->getOperand(SrcIdx); 323 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 324 if (Reg == AMDGPU::ALU_CONST) { 325 unsigned Sel = MI->getOperand( 326 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 327 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 328 continue; 329 } 330 if (Reg == AMDGPU::ALU_LITERAL_X) { 331 unsigned Imm = MI->getOperand( 332 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 333 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 334 continue; 335 } 336 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 337 } 338 return Result; 339} 340 341std::vector<std::pair<int, unsigned> > 342R600InstrInfo::ExtractSrcs(MachineInstr *MI, 343 const DenseMap<unsigned, unsigned> &PV, 344 unsigned &ConstCount) const { 345 ConstCount = 0; 346 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 347 const std::pair<int, unsigned> DummyPair(-1, 0); 348 std::vector<std::pair<int, unsigned> > Result; 349 unsigned i = 0; 350 for (unsigned n = Srcs.size(); i < n; ++i) { 351 unsigned Reg = Srcs[i].first->getReg(); 352 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 353 if (Reg == AMDGPU::OQAP) { 354 Result.push_back(std::pair<int, unsigned>(Index, 0)); 355 } 356 if (PV.find(Reg) != PV.end()) { 357 // 255 is used to tells its a PS/PV reg 358 Result.push_back(std::pair<int, unsigned>(255, 0)); 359 continue; 360 } 361 if (Index > 127) { 362 ConstCount++; 363 Result.push_back(DummyPair); 364 continue; 365 } 366 unsigned Chan = RI.getHWRegChan(Reg); 367 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 368 } 369 for (; i < 3; ++i) 370 Result.push_back(DummyPair); 371 return Result; 372} 373 374static std::vector<std::pair<int, unsigned> > 375Swizzle(std::vector<std::pair<int, unsigned> > Src, 376 R600InstrInfo::BankSwizzle Swz) { 377 if (Src[0] == Src[1]) 378 Src[1].first = -1; 379 switch (Swz) { 380 case R600InstrInfo::ALU_VEC_012_SCL_210: 381 break; 382 case R600InstrInfo::ALU_VEC_021_SCL_122: 383 std::swap(Src[1], Src[2]); 384 break; 385 case R600InstrInfo::ALU_VEC_102_SCL_221: 386 std::swap(Src[0], Src[1]); 387 break; 388 case R600InstrInfo::ALU_VEC_120_SCL_212: 389 std::swap(Src[0], Src[1]); 390 std::swap(Src[0], Src[2]); 391 break; 392 case R600InstrInfo::ALU_VEC_201: 393 std::swap(Src[0], Src[2]); 394 std::swap(Src[0], Src[1]); 395 break; 396 case R600InstrInfo::ALU_VEC_210: 397 std::swap(Src[0], Src[2]); 398 break; 399 } 400 return Src; 401} 402 403static unsigned 404getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 405 switch (Swz) { 406 case R600InstrInfo::ALU_VEC_012_SCL_210: { 407 unsigned Cycles[3] = { 2, 1, 0}; 408 return Cycles[Op]; 409 } 410 case R600InstrInfo::ALU_VEC_021_SCL_122: { 411 unsigned Cycles[3] = { 1, 2, 2}; 412 return Cycles[Op]; 413 } 414 case R600InstrInfo::ALU_VEC_120_SCL_212: { 415 unsigned Cycles[3] = { 2, 1, 2}; 416 return Cycles[Op]; 417 } 418 case R600InstrInfo::ALU_VEC_102_SCL_221: { 419 unsigned Cycles[3] = { 2, 2, 1}; 420 return Cycles[Op]; 421 } 422 default: 423 llvm_unreachable("Wrong Swizzle for Trans Slot"); 424 return 0; 425 } 426} 427 428/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 429/// in the same Instruction Group while meeting read port limitations given a 430/// Swz swizzle sequence. 431unsigned R600InstrInfo::isLegalUpTo( 432 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 433 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 434 const std::vector<std::pair<int, unsigned> > &TransSrcs, 435 R600InstrInfo::BankSwizzle TransSwz) const { 436 int Vector[4][3]; 437 memset(Vector, -1, sizeof(Vector)); 438 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 439 const std::vector<std::pair<int, unsigned> > &Srcs = 440 Swizzle(IGSrcs[i], Swz[i]); 441 for (unsigned j = 0; j < 3; j++) { 442 const std::pair<int, unsigned> &Src = Srcs[j]; 443 if (Src.first < 0 || Src.first == 255) 444 continue; 445 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 446 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 447 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 448 // The value from output queue A (denoted by register OQAP) can 449 // only be fetched during the first cycle. 450 return false; 451 } 452 // OQAP does not count towards the normal read port restrictions 453 continue; 454 } 455 if (Vector[Src.second][j] < 0) 456 Vector[Src.second][j] = Src.first; 457 if (Vector[Src.second][j] != Src.first) 458 return i; 459 } 460 } 461 // Now check Trans Alu 462 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 463 const std::pair<int, unsigned> &Src = TransSrcs[i]; 464 unsigned Cycle = getTransSwizzle(TransSwz, i); 465 if (Src.first < 0) 466 continue; 467 if (Src.first == 255) 468 continue; 469 if (Vector[Src.second][Cycle] < 0) 470 Vector[Src.second][Cycle] = Src.first; 471 if (Vector[Src.second][Cycle] != Src.first) 472 return IGSrcs.size() - 1; 473 } 474 return IGSrcs.size(); 475} 476 477/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 478/// (in lexicographic term) swizzle sequence assuming that all swizzles after 479/// Idx can be skipped 480static bool 481NextPossibleSolution( 482 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 483 unsigned Idx) { 484 assert(Idx < SwzCandidate.size()); 485 int ResetIdx = Idx; 486 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 487 ResetIdx --; 488 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 489 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 490 } 491 if (ResetIdx == -1) 492 return false; 493 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 494 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 495 return true; 496} 497 498/// Enumerate all possible Swizzle sequence to find one that can meet all 499/// read port requirements. 500bool R600InstrInfo::FindSwizzleForVectorSlot( 501 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 502 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 503 const std::vector<std::pair<int, unsigned> > &TransSrcs, 504 R600InstrInfo::BankSwizzle TransSwz) const { 505 unsigned ValidUpTo = 0; 506 do { 507 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 508 if (ValidUpTo == IGSrcs.size()) 509 return true; 510 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 511 return false; 512} 513 514/// Instructions in Trans slot can't read gpr at cycle 0 if they also read 515/// a const, and can't read a gpr at cycle 1 if they read 2 const. 516static bool 517isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 518 const std::vector<std::pair<int, unsigned> > &TransOps, 519 unsigned ConstCount) { 520 // TransALU can't read 3 constants 521 if (ConstCount > 2) 522 return false; 523 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 524 const std::pair<int, unsigned> &Src = TransOps[i]; 525 unsigned Cycle = getTransSwizzle(TransSwz, i); 526 if (Src.first < 0) 527 continue; 528 if (ConstCount > 0 && Cycle == 0) 529 return false; 530 if (ConstCount > 1 && Cycle == 1) 531 return false; 532 } 533 return true; 534} 535 536bool 537R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 538 const DenseMap<unsigned, unsigned> &PV, 539 std::vector<BankSwizzle> &ValidSwizzle, 540 bool isLastAluTrans) 541 const { 542 //Todo : support shared src0 - src1 operand 543 544 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 545 ValidSwizzle.clear(); 546 unsigned ConstCount; 547 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 548 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 549 IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 550 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 551 AMDGPU::OpName::bank_swizzle); 552 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 553 IG[i]->getOperand(Op).getImm()); 554 } 555 std::vector<std::pair<int, unsigned> > TransOps; 556 if (!isLastAluTrans) 557 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 558 559 TransOps = IGSrcs.back(); 560 IGSrcs.pop_back(); 561 ValidSwizzle.pop_back(); 562 563 static const R600InstrInfo::BankSwizzle TransSwz[] = { 564 ALU_VEC_012_SCL_210, 565 ALU_VEC_021_SCL_122, 566 ALU_VEC_120_SCL_212, 567 ALU_VEC_102_SCL_221 568 }; 569 for (unsigned i = 0; i < 4; i++) { 570 TransBS = TransSwz[i]; 571 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 572 continue; 573 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 574 TransBS); 575 if (Result) { 576 ValidSwizzle.push_back(TransBS); 577 return true; 578 } 579 } 580 581 return false; 582} 583 584 585bool 586R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 587 const { 588 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 589 unsigned Pair1 = 0, Pair2 = 0; 590 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 591 unsigned ReadConstHalf = Consts[i] & 2; 592 unsigned ReadConstIndex = Consts[i] & (~3); 593 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 594 if (!Pair1) { 595 Pair1 = ReadHalfConst; 596 continue; 597 } 598 if (Pair1 == ReadHalfConst) 599 continue; 600 if (!Pair2) { 601 Pair2 = ReadHalfConst; 602 continue; 603 } 604 if (Pair2 != ReadHalfConst) 605 return false; 606 } 607 return true; 608} 609 610bool 611R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 612 const { 613 std::vector<unsigned> Consts; 614 SmallSet<int64_t, 4> Literals; 615 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 616 MachineInstr *MI = MIs[i]; 617 if (!isALUInstr(MI->getOpcode())) 618 continue; 619 620 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 621 getSrcs(MI); 622 623 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 624 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 625 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 626 Literals.insert(Src.second); 627 if (Literals.size() > 4) 628 return false; 629 if (Src.first->getReg() == AMDGPU::ALU_CONST) 630 Consts.push_back(Src.second); 631 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 632 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 633 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 634 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 635 Consts.push_back((Index << 2) | Chan); 636 } 637 } 638 } 639 return fitsConstReadLimitations(Consts); 640} 641 642DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 643 const ScheduleDAG *DAG) const { 644 const InstrItineraryData *II = TM->getInstrItineraryData(); 645 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 646} 647 648static bool 649isPredicateSetter(unsigned Opcode) { 650 switch (Opcode) { 651 case AMDGPU::PRED_X: 652 return true; 653 default: 654 return false; 655 } 656} 657 658static MachineInstr * 659findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 660 MachineBasicBlock::iterator I) { 661 while (I != MBB.begin()) { 662 --I; 663 MachineInstr *MI = I; 664 if (isPredicateSetter(MI->getOpcode())) 665 return MI; 666 } 667 668 return NULL; 669} 670 671static 672bool isJump(unsigned Opcode) { 673 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 674} 675 676static bool isBranch(unsigned Opcode) { 677 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 678 Opcode == AMDGPU::BRANCH_COND_f32; 679} 680 681bool 682R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 683 MachineBasicBlock *&TBB, 684 MachineBasicBlock *&FBB, 685 SmallVectorImpl<MachineOperand> &Cond, 686 bool AllowModify) const { 687 // Most of the following comes from the ARM implementation of AnalyzeBranch 688 689 // If the block has no terminators, it just falls into the block after it. 690 MachineBasicBlock::iterator I = MBB.end(); 691 if (I == MBB.begin()) 692 return false; 693 --I; 694 while (I->isDebugValue()) { 695 if (I == MBB.begin()) 696 return false; 697 --I; 698 } 699 // AMDGPU::BRANCH* instructions are only available after isel and are not 700 // handled 701 if (isBranch(I->getOpcode())) 702 return true; 703 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 704 return false; 705 } 706 707 // Get the last instruction in the block. 708 MachineInstr *LastInst = I; 709 710 // If there is only one terminator instruction, process it. 711 unsigned LastOpc = LastInst->getOpcode(); 712 if (I == MBB.begin() || 713 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 714 if (LastOpc == AMDGPU::JUMP) { 715 TBB = LastInst->getOperand(0).getMBB(); 716 return false; 717 } else if (LastOpc == AMDGPU::JUMP_COND) { 718 MachineInstr *predSet = I; 719 while (!isPredicateSetter(predSet->getOpcode())) { 720 predSet = --I; 721 } 722 TBB = LastInst->getOperand(0).getMBB(); 723 Cond.push_back(predSet->getOperand(1)); 724 Cond.push_back(predSet->getOperand(2)); 725 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 726 return false; 727 } 728 return true; // Can't handle indirect branch. 729 } 730 731 // Get the instruction before it if it is a terminator. 732 MachineInstr *SecondLastInst = I; 733 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 734 735 // If the block ends with a B and a Bcc, handle it. 736 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 737 MachineInstr *predSet = --I; 738 while (!isPredicateSetter(predSet->getOpcode())) { 739 predSet = --I; 740 } 741 TBB = SecondLastInst->getOperand(0).getMBB(); 742 FBB = LastInst->getOperand(0).getMBB(); 743 Cond.push_back(predSet->getOperand(1)); 744 Cond.push_back(predSet->getOperand(2)); 745 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 746 return false; 747 } 748 749 // Otherwise, can't handle this. 750 return true; 751} 752 753int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 754 const MachineInstr *MI = op.getParent(); 755 756 switch (MI->getDesc().OpInfo->RegClass) { 757 default: // FIXME: fallthrough?? 758 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 759 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 760 }; 761} 762 763static 764MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 765 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 766 It != E; ++It) { 767 if (It->getOpcode() == AMDGPU::CF_ALU || 768 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 769 return llvm::prior(It.base()); 770 } 771 return MBB.end(); 772} 773 774unsigned 775R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 776 MachineBasicBlock *TBB, 777 MachineBasicBlock *FBB, 778 const SmallVectorImpl<MachineOperand> &Cond, 779 DebugLoc DL) const { 780 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 781 782 if (FBB == 0) { 783 if (Cond.empty()) { 784 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 785 return 1; 786 } else { 787 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 788 assert(PredSet && "No previous predicate !"); 789 addFlag(PredSet, 0, MO_FLAG_PUSH); 790 PredSet->getOperand(2).setImm(Cond[1].getImm()); 791 792 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 793 .addMBB(TBB) 794 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 795 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 796 if (CfAlu == MBB.end()) 797 return 1; 798 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 799 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 800 return 1; 801 } 802 } else { 803 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 804 assert(PredSet && "No previous predicate !"); 805 addFlag(PredSet, 0, MO_FLAG_PUSH); 806 PredSet->getOperand(2).setImm(Cond[1].getImm()); 807 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 808 .addMBB(TBB) 809 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 810 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 811 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 812 if (CfAlu == MBB.end()) 813 return 2; 814 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 815 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 816 return 2; 817 } 818} 819 820unsigned 821R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 822 823 // Note : we leave PRED* instructions there. 824 // They may be needed when predicating instructions. 825 826 MachineBasicBlock::iterator I = MBB.end(); 827 828 if (I == MBB.begin()) { 829 return 0; 830 } 831 --I; 832 switch (I->getOpcode()) { 833 default: 834 return 0; 835 case AMDGPU::JUMP_COND: { 836 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 837 clearFlag(predSet, 0, MO_FLAG_PUSH); 838 I->eraseFromParent(); 839 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 840 if (CfAlu == MBB.end()) 841 break; 842 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 843 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 844 break; 845 } 846 case AMDGPU::JUMP: 847 I->eraseFromParent(); 848 break; 849 } 850 I = MBB.end(); 851 852 if (I == MBB.begin()) { 853 return 1; 854 } 855 --I; 856 switch (I->getOpcode()) { 857 // FIXME: only one case?? 858 default: 859 return 1; 860 case AMDGPU::JUMP_COND: { 861 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 862 clearFlag(predSet, 0, MO_FLAG_PUSH); 863 I->eraseFromParent(); 864 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 865 if (CfAlu == MBB.end()) 866 break; 867 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 868 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 869 break; 870 } 871 case AMDGPU::JUMP: 872 I->eraseFromParent(); 873 break; 874 } 875 return 2; 876} 877 878bool 879R600InstrInfo::isPredicated(const MachineInstr *MI) const { 880 int idx = MI->findFirstPredOperandIdx(); 881 if (idx < 0) 882 return false; 883 884 unsigned Reg = MI->getOperand(idx).getReg(); 885 switch (Reg) { 886 default: return false; 887 case AMDGPU::PRED_SEL_ONE: 888 case AMDGPU::PRED_SEL_ZERO: 889 case AMDGPU::PREDICATE_BIT: 890 return true; 891 } 892} 893 894bool 895R600InstrInfo::isPredicable(MachineInstr *MI) const { 896 // XXX: KILL* instructions can be predicated, but they must be the last 897 // instruction in a clause, so this means any instructions after them cannot 898 // be predicated. Until we have proper support for instruction clauses in the 899 // backend, we will mark KILL* instructions as unpredicable. 900 901 if (MI->getOpcode() == AMDGPU::KILLGT) { 902 return false; 903 } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 904 // If the clause start in the middle of MBB then the MBB has more 905 // than a single clause, unable to predicate several clauses. 906 if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 907 return false; 908 // TODO: We don't support KC merging atm 909 if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 910 return false; 911 return true; 912 } else if (isVector(*MI)) { 913 return false; 914 } else { 915 return AMDGPUInstrInfo::isPredicable(MI); 916 } 917} 918 919 920bool 921R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 922 unsigned NumCyles, 923 unsigned ExtraPredCycles, 924 const BranchProbability &Probability) const{ 925 return true; 926} 927 928bool 929R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 930 unsigned NumTCycles, 931 unsigned ExtraTCycles, 932 MachineBasicBlock &FMBB, 933 unsigned NumFCycles, 934 unsigned ExtraFCycles, 935 const BranchProbability &Probability) const { 936 return true; 937} 938 939bool 940R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 941 unsigned NumCyles, 942 const BranchProbability &Probability) 943 const { 944 return true; 945} 946 947bool 948R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 949 MachineBasicBlock &FMBB) const { 950 return false; 951} 952 953 954bool 955R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 956 MachineOperand &MO = Cond[1]; 957 switch (MO.getImm()) { 958 case OPCODE_IS_ZERO_INT: 959 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 960 break; 961 case OPCODE_IS_NOT_ZERO_INT: 962 MO.setImm(OPCODE_IS_ZERO_INT); 963 break; 964 case OPCODE_IS_ZERO: 965 MO.setImm(OPCODE_IS_NOT_ZERO); 966 break; 967 case OPCODE_IS_NOT_ZERO: 968 MO.setImm(OPCODE_IS_ZERO); 969 break; 970 default: 971 return true; 972 } 973 974 MachineOperand &MO2 = Cond[2]; 975 switch (MO2.getReg()) { 976 case AMDGPU::PRED_SEL_ZERO: 977 MO2.setReg(AMDGPU::PRED_SEL_ONE); 978 break; 979 case AMDGPU::PRED_SEL_ONE: 980 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 981 break; 982 default: 983 return true; 984 } 985 return false; 986} 987 988bool 989R600InstrInfo::DefinesPredicate(MachineInstr *MI, 990 std::vector<MachineOperand> &Pred) const { 991 return isPredicateSetter(MI->getOpcode()); 992} 993 994 995bool 996R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 997 const SmallVectorImpl<MachineOperand> &Pred2) const { 998 return false; 999} 1000 1001 1002bool 1003R600InstrInfo::PredicateInstruction(MachineInstr *MI, 1004 const SmallVectorImpl<MachineOperand> &Pred) const { 1005 int PIdx = MI->findFirstPredOperandIdx(); 1006 1007 if (MI->getOpcode() == AMDGPU::CF_ALU) { 1008 MI->getOperand(8).setImm(0); 1009 return true; 1010 } 1011 1012 if (MI->getOpcode() == AMDGPU::DOT_4) { 1013 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) 1014 .setReg(Pred[2].getReg()); 1015 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) 1016 .setReg(Pred[2].getReg()); 1017 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) 1018 .setReg(Pred[2].getReg()); 1019 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) 1020 .setReg(Pred[2].getReg()); 1021 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1022 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1023 return true; 1024 } 1025 1026 if (PIdx != -1) { 1027 MachineOperand &PMO = MI->getOperand(PIdx); 1028 PMO.setReg(Pred[2].getReg()); 1029 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1030 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1031 return true; 1032 } 1033 1034 return false; 1035} 1036 1037unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { 1038 return 2; 1039} 1040 1041unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1042 const MachineInstr *MI, 1043 unsigned *PredCost) const { 1044 if (PredCost) 1045 *PredCost = 2; 1046 return 2; 1047} 1048 1049void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1050 const MachineFunction &MF) const { 1051 const AMDGPUFrameLowering *TFL = 1052 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 1053 1054 unsigned StackWidth = TFL->getStackWidth(MF); 1055 int End = getIndirectIndexEnd(MF); 1056 1057 if (End == -1) 1058 return; 1059 1060 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1061 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1062 Reserved.set(SuperReg); 1063 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1064 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1065 Reserved.set(Reg); 1066 } 1067 } 1068} 1069 1070unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1071 unsigned Channel) const { 1072 // XXX: Remove when we support a stack width > 2 1073 assert(Channel == 0); 1074 return RegIndex; 1075} 1076 1077const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1078 return &AMDGPU::R600_TReg32_XRegClass; 1079} 1080 1081MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1082 MachineBasicBlock::iterator I, 1083 unsigned ValueReg, unsigned Address, 1084 unsigned OffsetReg) const { 1085 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1086 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1087 AMDGPU::AR_X, OffsetReg); 1088 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1089 1090 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1091 AddrReg, ValueReg) 1092 .addReg(AMDGPU::AR_X, 1093 RegState::Implicit | RegState::Kill); 1094 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 1095 return Mov; 1096} 1097 1098MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1099 MachineBasicBlock::iterator I, 1100 unsigned ValueReg, unsigned Address, 1101 unsigned OffsetReg) const { 1102 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1103 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1104 AMDGPU::AR_X, 1105 OffsetReg); 1106 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1107 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1108 ValueReg, 1109 AddrReg) 1110 .addReg(AMDGPU::AR_X, 1111 RegState::Implicit | RegState::Kill); 1112 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 1113 1114 return Mov; 1115} 1116 1117unsigned R600InstrInfo::getMaxAlusPerClause() const { 1118 return 115; 1119} 1120 1121MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1122 MachineBasicBlock::iterator I, 1123 unsigned Opcode, 1124 unsigned DstReg, 1125 unsigned Src0Reg, 1126 unsigned Src1Reg) const { 1127 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1128 DstReg); // $dst 1129 1130 if (Src1Reg) { 1131 MIB.addImm(0) // $update_exec_mask 1132 .addImm(0); // $update_predicate 1133 } 1134 MIB.addImm(1) // $write 1135 .addImm(0) // $omod 1136 .addImm(0) // $dst_rel 1137 .addImm(0) // $dst_clamp 1138 .addReg(Src0Reg) // $src0 1139 .addImm(0) // $src0_neg 1140 .addImm(0) // $src0_rel 1141 .addImm(0) // $src0_abs 1142 .addImm(-1); // $src0_sel 1143 1144 if (Src1Reg) { 1145 MIB.addReg(Src1Reg) // $src1 1146 .addImm(0) // $src1_neg 1147 .addImm(0) // $src1_rel 1148 .addImm(0) // $src1_abs 1149 .addImm(-1); // $src1_sel 1150 } 1151 1152 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1153 //scheduling to the backend, we can change the default to 0. 1154 MIB.addImm(1) // $last 1155 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1156 .addImm(0) // $literal 1157 .addImm(0); // $bank_swizzle 1158 1159 return MIB; 1160} 1161 1162#define OPERAND_CASE(Label) \ 1163 case Label: { \ 1164 static const unsigned Ops[] = \ 1165 { \ 1166 Label##_X, \ 1167 Label##_Y, \ 1168 Label##_Z, \ 1169 Label##_W \ 1170 }; \ 1171 return Ops[Slot]; \ 1172 } 1173 1174static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1175 switch (Op) { 1176 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1177 OPERAND_CASE(AMDGPU::OpName::update_pred) 1178 OPERAND_CASE(AMDGPU::OpName::write) 1179 OPERAND_CASE(AMDGPU::OpName::omod) 1180 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1181 OPERAND_CASE(AMDGPU::OpName::clamp) 1182 OPERAND_CASE(AMDGPU::OpName::src0) 1183 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1184 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1185 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1186 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1187 OPERAND_CASE(AMDGPU::OpName::src1) 1188 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1189 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1190 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1191 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1192 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1193 default: 1194 llvm_unreachable("Wrong Operand"); 1195 } 1196} 1197 1198#undef OPERAND_CASE 1199 1200MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1201 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1202 const { 1203 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1204 unsigned Opcode; 1205 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 1206 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 1207 Opcode = AMDGPU::DOT4_r600; 1208 else 1209 Opcode = AMDGPU::DOT4_eg; 1210 MachineBasicBlock::iterator I = MI; 1211 MachineOperand &Src0 = MI->getOperand( 1212 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1213 MachineOperand &Src1 = MI->getOperand( 1214 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1215 MachineInstr *MIB = buildDefaultInstruction( 1216 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1217 static const unsigned Operands[14] = { 1218 AMDGPU::OpName::update_exec_mask, 1219 AMDGPU::OpName::update_pred, 1220 AMDGPU::OpName::write, 1221 AMDGPU::OpName::omod, 1222 AMDGPU::OpName::dst_rel, 1223 AMDGPU::OpName::clamp, 1224 AMDGPU::OpName::src0_neg, 1225 AMDGPU::OpName::src0_rel, 1226 AMDGPU::OpName::src0_abs, 1227 AMDGPU::OpName::src0_sel, 1228 AMDGPU::OpName::src1_neg, 1229 AMDGPU::OpName::src1_rel, 1230 AMDGPU::OpName::src1_abs, 1231 AMDGPU::OpName::src1_sel, 1232 }; 1233 1234 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 1235 getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 1236 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 1237 .setReg(MO.getReg()); 1238 1239 for (unsigned i = 0; i < 14; i++) { 1240 MachineOperand &MO = MI->getOperand( 1241 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1242 assert (MO.isImm()); 1243 setImmOperand(MIB, Operands[i], MO.getImm()); 1244 } 1245 MIB->getOperand(20).setImm(0); 1246 return MIB; 1247} 1248 1249MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1250 MachineBasicBlock::iterator I, 1251 unsigned DstReg, 1252 uint64_t Imm) const { 1253 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1254 AMDGPU::ALU_LITERAL_X); 1255 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 1256 return MovImm; 1257} 1258 1259MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1260 MachineBasicBlock::iterator I, 1261 unsigned DstReg, unsigned SrcReg) const { 1262 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1263} 1264 1265int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1266 return getOperandIdx(MI.getOpcode(), Op); 1267} 1268 1269int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1270 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1271} 1272 1273void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 1274 int64_t Imm) const { 1275 int Idx = getOperandIdx(*MI, Op); 1276 assert(Idx != -1 && "Operand not supported for this instruction."); 1277 assert(MI->getOperand(Idx).isImm()); 1278 MI->getOperand(Idx).setImm(Imm); 1279} 1280 1281//===----------------------------------------------------------------------===// 1282// Instruction flag getters/setters 1283//===----------------------------------------------------------------------===// 1284 1285bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1286 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1287} 1288 1289MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1290 unsigned Flag) const { 1291 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1292 int FlagIndex = 0; 1293 if (Flag != 0) { 1294 // If we pass something other than the default value of Flag to this 1295 // function, it means we are want to set a flag on an instruction 1296 // that uses native encoding. 1297 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1298 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1299 switch (Flag) { 1300 case MO_FLAG_CLAMP: 1301 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1302 break; 1303 case MO_FLAG_MASK: 1304 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1305 break; 1306 case MO_FLAG_NOT_LAST: 1307 case MO_FLAG_LAST: 1308 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1309 break; 1310 case MO_FLAG_NEG: 1311 switch (SrcIdx) { 1312 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1313 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1314 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1315 } 1316 break; 1317 1318 case MO_FLAG_ABS: 1319 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1320 "instructions."); 1321 (void)IsOP3; 1322 switch (SrcIdx) { 1323 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1324 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1325 } 1326 break; 1327 1328 default: 1329 FlagIndex = -1; 1330 break; 1331 } 1332 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1333 } else { 1334 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1335 assert(FlagIndex != 0 && 1336 "Instruction flags not supported for this instruction"); 1337 } 1338 1339 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1340 assert(FlagOp.isImm()); 1341 return FlagOp; 1342} 1343 1344void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1345 unsigned Flag) const { 1346 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1347 if (Flag == 0) { 1348 return; 1349 } 1350 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1351 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1352 if (Flag == MO_FLAG_NOT_LAST) { 1353 clearFlag(MI, Operand, MO_FLAG_LAST); 1354 } else if (Flag == MO_FLAG_MASK) { 1355 clearFlag(MI, Operand, Flag); 1356 } else { 1357 FlagOp.setImm(1); 1358 } 1359 } else { 1360 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1361 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1362 } 1363} 1364 1365void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1366 unsigned Flag) const { 1367 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1368 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1369 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1370 FlagOp.setImm(0); 1371 } else { 1372 MachineOperand &FlagOp = getFlagOp(MI); 1373 unsigned InstFlags = FlagOp.getImm(); 1374 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1375 FlagOp.setImm(InstFlags); 1376 } 1377} 1378