R600InstrInfo.cpp revision 7e9381951eb4dadf9c59257786416ac51a6a6c09
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Implementation of TargetInstrInfo. 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600InstrInfo.h" 16#include "AMDGPU.h" 17#include "AMDGPUSubtarget.h" 18#include "AMDGPUTargetMachine.h" 19#include "R600Defines.h" 20#include "R600MachineFunctionInfo.h" 21#include "R600RegisterInfo.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25 26#define GET_INSTRINFO_CTOR 27#include "AMDGPUGenDFAPacketizer.inc" 28 29using namespace llvm; 30 31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32 : AMDGPUInstrInfo(tm), 33 RI(tm), 34 ST(tm.getSubtarget<AMDGPUSubtarget>()) 35 { } 36 37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38 return RI; 39} 40 41bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43} 44 45bool R600InstrInfo::isVector(const MachineInstr &MI) const { 46 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47} 48 49void 50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51 MachineBasicBlock::iterator MI, DebugLoc DL, 52 unsigned DestReg, unsigned SrcReg, 53 bool KillSrc) const { 54 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 55 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 56 for (unsigned I = 0; I < 4; I++) { 57 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 58 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 59 RI.getSubReg(DestReg, SubRegIndex), 60 RI.getSubReg(SrcReg, SubRegIndex)) 61 .addReg(DestReg, 62 RegState::Define | RegState::Implicit); 63 } 64 } else { 65 66 // We can't copy vec4 registers 67 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 68 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 69 70 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 71 DestReg, SrcReg); 72 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 73 .setIsKill(KillSrc); 74 } 75} 76 77MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 78 unsigned DstReg, int64_t Imm) const { 79 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 80 MachineInstrBuilder MIB(*MF, MI); 81 MIB.addReg(DstReg, RegState::Define); 82 MIB.addReg(AMDGPU::ALU_LITERAL_X); 83 MIB.addImm(Imm); 84 MIB.addReg(0); // PREDICATE_BIT 85 86 return MI; 87} 88 89unsigned R600InstrInfo::getIEQOpcode() const { 90 return AMDGPU::SETE_INT; 91} 92 93bool R600InstrInfo::isMov(unsigned Opcode) const { 94 95 96 switch(Opcode) { 97 default: return false; 98 case AMDGPU::MOV: 99 case AMDGPU::MOV_IMM_F32: 100 case AMDGPU::MOV_IMM_I32: 101 return true; 102 } 103} 104 105// Some instructions act as place holders to emulate operations that the GPU 106// hardware does automatically. This function can be used to check if 107// an opcode falls into this category. 108bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 109 switch (Opcode) { 110 default: return false; 111 case AMDGPU::RETURN: 112 return true; 113 } 114} 115 116bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 117 switch(Opcode) { 118 default: return false; 119 } 120} 121 122bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 123 switch(Opcode) { 124 default: return false; 125 case AMDGPU::CUBE_r600_pseudo: 126 case AMDGPU::CUBE_r600_real: 127 case AMDGPU::CUBE_eg_pseudo: 128 case AMDGPU::CUBE_eg_real: 129 return true; 130 } 131} 132 133bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 134 unsigned TargetFlags = get(Opcode).TSFlags; 135 136 return (TargetFlags & R600_InstFlag::ALU_INST); 137} 138 139bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 140 return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY); 141} 142 143bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 144 return isTransOnly(MI->getOpcode()); 145} 146 147bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 148 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 149} 150 151bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 152 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 153 return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 154} 155 156bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 157 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 158} 159 160bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 161 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 162 return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 163 usesTextureCache(MI->getOpcode()); 164} 165 166SmallVector<std::pair<MachineOperand *, int64_t>, 3> 167R600InstrInfo::getSrcs(MachineInstr *MI) const { 168 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 169 170 if (MI->getOpcode() == AMDGPU::DOT_4) { 171 static const unsigned OpTable[8][2] = { 172 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 173 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 174 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 175 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 176 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 177 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 178 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 179 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 180 }; 181 182 for (unsigned j = 0; j < 8; j++) { 183 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 184 OpTable[j][0])); 185 unsigned Reg = MO.getReg(); 186 if (Reg == AMDGPU::ALU_CONST) { 187 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 188 OpTable[j][1])).getImm(); 189 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 190 continue; 191 } 192 193 } 194 return Result; 195 } 196 197 static const unsigned OpTable[3][2] = { 198 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 199 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 200 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 201 }; 202 203 for (unsigned j = 0; j < 3; j++) { 204 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 205 if (SrcIdx < 0) 206 break; 207 MachineOperand &MO = MI->getOperand(SrcIdx); 208 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 209 if (Reg == AMDGPU::ALU_CONST) { 210 unsigned Sel = MI->getOperand( 211 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 212 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 213 continue; 214 } 215 if (Reg == AMDGPU::ALU_LITERAL_X) { 216 unsigned Imm = MI->getOperand( 217 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 218 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 219 continue; 220 } 221 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 222 } 223 return Result; 224} 225 226std::vector<std::pair<int, unsigned> > 227R600InstrInfo::ExtractSrcs(MachineInstr *MI, 228 const DenseMap<unsigned, unsigned> &PV) 229 const { 230 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 231 const std::pair<int, unsigned> DummyPair(-1, 0); 232 std::vector<std::pair<int, unsigned> > Result; 233 unsigned i = 0; 234 for (unsigned n = Srcs.size(); i < n; ++i) { 235 unsigned Reg = Srcs[i].first->getReg(); 236 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 237 unsigned Chan = RI.getHWRegChan(Reg); 238 if (Index > 127) { 239 Result.push_back(DummyPair); 240 continue; 241 } 242 if (PV.find(Reg) != PV.end()) { 243 Result.push_back(DummyPair); 244 continue; 245 } 246 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 247 } 248 for (; i < 3; ++i) 249 Result.push_back(DummyPair); 250 return Result; 251} 252 253static std::vector<std::pair<int, unsigned> > 254Swizzle(std::vector<std::pair<int, unsigned> > Src, 255 R600InstrInfo::BankSwizzle Swz) { 256 switch (Swz) { 257 case R600InstrInfo::ALU_VEC_012: 258 break; 259 case R600InstrInfo::ALU_VEC_021: 260 std::swap(Src[1], Src[2]); 261 break; 262 case R600InstrInfo::ALU_VEC_102: 263 std::swap(Src[0], Src[1]); 264 break; 265 case R600InstrInfo::ALU_VEC_120: 266 std::swap(Src[0], Src[1]); 267 std::swap(Src[0], Src[2]); 268 break; 269 case R600InstrInfo::ALU_VEC_201: 270 std::swap(Src[0], Src[2]); 271 std::swap(Src[0], Src[1]); 272 break; 273 case R600InstrInfo::ALU_VEC_210: 274 std::swap(Src[0], Src[2]); 275 break; 276 } 277 return Src; 278} 279 280static bool 281isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 282 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 283 unsigned CheckedSize) { 284 int Vector[4][3]; 285 memset(Vector, -1, sizeof(Vector)); 286 for (unsigned i = 0; i < CheckedSize; i++) { 287 const std::vector<std::pair<int, unsigned> > &Srcs = 288 Swizzle(IGSrcs[i], Swz[i]); 289 for (unsigned j = 0; j < 3; j++) { 290 const std::pair<int, unsigned> &Src = Srcs[j]; 291 if (Src.first < 0) 292 continue; 293 if (Vector[Src.second][j] < 0) 294 Vector[Src.second][j] = Src.first; 295 if (Vector[Src.second][j] != Src.first) 296 return false; 297 } 298 } 299 return true; 300} 301 302static bool recursiveFitsFPLimitation( 303const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 304std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 305unsigned Depth = 0) { 306 if (!isLegal(IGSrcs, SwzCandidate, Depth)) 307 return false; 308 if (IGSrcs.size() == Depth) 309 return true; 310 unsigned i = SwzCandidate[Depth]; 311 for (; i < 6; i++) { 312 SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i; 313 if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1)) 314 return true; 315 } 316 SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012; 317 return false; 318} 319 320bool 321R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 322 const DenseMap<unsigned, unsigned> &PV, 323 std::vector<BankSwizzle> &ValidSwizzle) 324 const { 325 //Todo : support shared src0 - src1 operand 326 327 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 328 ValidSwizzle.clear(); 329 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 330 IGSrcs.push_back(ExtractSrcs(IG[i], PV)); 331 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 332 AMDGPU::OpName::bank_swizzle); 333 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 334 IG[i]->getOperand(Op).getImm()); 335 } 336 bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle); 337 if (!Result) 338 return false; 339 return true; 340} 341 342 343bool 344R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 345 const { 346 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 347 unsigned Pair1 = 0, Pair2 = 0; 348 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 349 unsigned ReadConstHalf = Consts[i] & 2; 350 unsigned ReadConstIndex = Consts[i] & (~3); 351 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 352 if (!Pair1) { 353 Pair1 = ReadHalfConst; 354 continue; 355 } 356 if (Pair1 == ReadHalfConst) 357 continue; 358 if (!Pair2) { 359 Pair2 = ReadHalfConst; 360 continue; 361 } 362 if (Pair2 != ReadHalfConst) 363 return false; 364 } 365 return true; 366} 367 368bool 369R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { 370 std::vector<unsigned> Consts; 371 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 372 MachineInstr *MI = MIs[i]; 373 if (!isALUInstr(MI->getOpcode())) 374 continue; 375 376 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Srcs = 377 getSrcs(MI); 378 379 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 380 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 381 if (Src.first->getReg() == AMDGPU::ALU_CONST) 382 Consts.push_back(Src.second); 383 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 384 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 385 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 386 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 387 Consts.push_back((Index << 2) | Chan); 388 } 389 } 390 } 391 return fitsConstReadLimitations(Consts); 392} 393 394DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 395 const ScheduleDAG *DAG) const { 396 const InstrItineraryData *II = TM->getInstrItineraryData(); 397 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 398} 399 400static bool 401isPredicateSetter(unsigned Opcode) { 402 switch (Opcode) { 403 case AMDGPU::PRED_X: 404 return true; 405 default: 406 return false; 407 } 408} 409 410static MachineInstr * 411findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 412 MachineBasicBlock::iterator I) { 413 while (I != MBB.begin()) { 414 --I; 415 MachineInstr *MI = I; 416 if (isPredicateSetter(MI->getOpcode())) 417 return MI; 418 } 419 420 return NULL; 421} 422 423static 424bool isJump(unsigned Opcode) { 425 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 426} 427 428bool 429R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 430 MachineBasicBlock *&TBB, 431 MachineBasicBlock *&FBB, 432 SmallVectorImpl<MachineOperand> &Cond, 433 bool AllowModify) const { 434 // Most of the following comes from the ARM implementation of AnalyzeBranch 435 436 // If the block has no terminators, it just falls into the block after it. 437 MachineBasicBlock::iterator I = MBB.end(); 438 if (I == MBB.begin()) 439 return false; 440 --I; 441 while (I->isDebugValue()) { 442 if (I == MBB.begin()) 443 return false; 444 --I; 445 } 446 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 447 return false; 448 } 449 450 // Get the last instruction in the block. 451 MachineInstr *LastInst = I; 452 453 // If there is only one terminator instruction, process it. 454 unsigned LastOpc = LastInst->getOpcode(); 455 if (I == MBB.begin() || 456 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 457 if (LastOpc == AMDGPU::JUMP) { 458 TBB = LastInst->getOperand(0).getMBB(); 459 return false; 460 } else if (LastOpc == AMDGPU::JUMP_COND) { 461 MachineInstr *predSet = I; 462 while (!isPredicateSetter(predSet->getOpcode())) { 463 predSet = --I; 464 } 465 TBB = LastInst->getOperand(0).getMBB(); 466 Cond.push_back(predSet->getOperand(1)); 467 Cond.push_back(predSet->getOperand(2)); 468 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 469 return false; 470 } 471 return true; // Can't handle indirect branch. 472 } 473 474 // Get the instruction before it if it is a terminator. 475 MachineInstr *SecondLastInst = I; 476 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 477 478 // If the block ends with a B and a Bcc, handle it. 479 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 480 MachineInstr *predSet = --I; 481 while (!isPredicateSetter(predSet->getOpcode())) { 482 predSet = --I; 483 } 484 TBB = SecondLastInst->getOperand(0).getMBB(); 485 FBB = LastInst->getOperand(0).getMBB(); 486 Cond.push_back(predSet->getOperand(1)); 487 Cond.push_back(predSet->getOperand(2)); 488 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 489 return false; 490 } 491 492 // Otherwise, can't handle this. 493 return true; 494} 495 496int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 497 const MachineInstr *MI = op.getParent(); 498 499 switch (MI->getDesc().OpInfo->RegClass) { 500 default: // FIXME: fallthrough?? 501 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 502 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 503 }; 504} 505 506unsigned 507R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 508 MachineBasicBlock *TBB, 509 MachineBasicBlock *FBB, 510 const SmallVectorImpl<MachineOperand> &Cond, 511 DebugLoc DL) const { 512 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 513 514 if (FBB == 0) { 515 if (Cond.empty()) { 516 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 517 return 1; 518 } else { 519 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 520 assert(PredSet && "No previous predicate !"); 521 addFlag(PredSet, 0, MO_FLAG_PUSH); 522 PredSet->getOperand(2).setImm(Cond[1].getImm()); 523 524 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 525 .addMBB(TBB) 526 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 527 return 1; 528 } 529 } else { 530 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 531 assert(PredSet && "No previous predicate !"); 532 addFlag(PredSet, 0, MO_FLAG_PUSH); 533 PredSet->getOperand(2).setImm(Cond[1].getImm()); 534 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 535 .addMBB(TBB) 536 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 537 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 538 return 2; 539 } 540} 541 542unsigned 543R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 544 545 // Note : we leave PRED* instructions there. 546 // They may be needed when predicating instructions. 547 548 MachineBasicBlock::iterator I = MBB.end(); 549 550 if (I == MBB.begin()) { 551 return 0; 552 } 553 --I; 554 switch (I->getOpcode()) { 555 default: 556 return 0; 557 case AMDGPU::JUMP_COND: { 558 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 559 clearFlag(predSet, 0, MO_FLAG_PUSH); 560 I->eraseFromParent(); 561 break; 562 } 563 case AMDGPU::JUMP: 564 I->eraseFromParent(); 565 break; 566 } 567 I = MBB.end(); 568 569 if (I == MBB.begin()) { 570 return 1; 571 } 572 --I; 573 switch (I->getOpcode()) { 574 // FIXME: only one case?? 575 default: 576 return 1; 577 case AMDGPU::JUMP_COND: { 578 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 579 clearFlag(predSet, 0, MO_FLAG_PUSH); 580 I->eraseFromParent(); 581 break; 582 } 583 case AMDGPU::JUMP: 584 I->eraseFromParent(); 585 break; 586 } 587 return 2; 588} 589 590bool 591R600InstrInfo::isPredicated(const MachineInstr *MI) const { 592 int idx = MI->findFirstPredOperandIdx(); 593 if (idx < 0) 594 return false; 595 596 unsigned Reg = MI->getOperand(idx).getReg(); 597 switch (Reg) { 598 default: return false; 599 case AMDGPU::PRED_SEL_ONE: 600 case AMDGPU::PRED_SEL_ZERO: 601 case AMDGPU::PREDICATE_BIT: 602 return true; 603 } 604} 605 606bool 607R600InstrInfo::isPredicable(MachineInstr *MI) const { 608 // XXX: KILL* instructions can be predicated, but they must be the last 609 // instruction in a clause, so this means any instructions after them cannot 610 // be predicated. Until we have proper support for instruction clauses in the 611 // backend, we will mark KILL* instructions as unpredicable. 612 613 if (MI->getOpcode() == AMDGPU::KILLGT) { 614 return false; 615 } else if (isVector(*MI)) { 616 return false; 617 } else { 618 return AMDGPUInstrInfo::isPredicable(MI); 619 } 620} 621 622 623bool 624R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 625 unsigned NumCyles, 626 unsigned ExtraPredCycles, 627 const BranchProbability &Probability) const{ 628 return true; 629} 630 631bool 632R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 633 unsigned NumTCycles, 634 unsigned ExtraTCycles, 635 MachineBasicBlock &FMBB, 636 unsigned NumFCycles, 637 unsigned ExtraFCycles, 638 const BranchProbability &Probability) const { 639 return true; 640} 641 642bool 643R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 644 unsigned NumCyles, 645 const BranchProbability &Probability) 646 const { 647 return true; 648} 649 650bool 651R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 652 MachineBasicBlock &FMBB) const { 653 return false; 654} 655 656 657bool 658R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 659 MachineOperand &MO = Cond[1]; 660 switch (MO.getImm()) { 661 case OPCODE_IS_ZERO_INT: 662 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 663 break; 664 case OPCODE_IS_NOT_ZERO_INT: 665 MO.setImm(OPCODE_IS_ZERO_INT); 666 break; 667 case OPCODE_IS_ZERO: 668 MO.setImm(OPCODE_IS_NOT_ZERO); 669 break; 670 case OPCODE_IS_NOT_ZERO: 671 MO.setImm(OPCODE_IS_ZERO); 672 break; 673 default: 674 return true; 675 } 676 677 MachineOperand &MO2 = Cond[2]; 678 switch (MO2.getReg()) { 679 case AMDGPU::PRED_SEL_ZERO: 680 MO2.setReg(AMDGPU::PRED_SEL_ONE); 681 break; 682 case AMDGPU::PRED_SEL_ONE: 683 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 684 break; 685 default: 686 return true; 687 } 688 return false; 689} 690 691bool 692R600InstrInfo::DefinesPredicate(MachineInstr *MI, 693 std::vector<MachineOperand> &Pred) const { 694 return isPredicateSetter(MI->getOpcode()); 695} 696 697 698bool 699R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 700 const SmallVectorImpl<MachineOperand> &Pred2) const { 701 return false; 702} 703 704 705bool 706R600InstrInfo::PredicateInstruction(MachineInstr *MI, 707 const SmallVectorImpl<MachineOperand> &Pred) const { 708 int PIdx = MI->findFirstPredOperandIdx(); 709 710 if (PIdx != -1) { 711 MachineOperand &PMO = MI->getOperand(PIdx); 712 PMO.setReg(Pred[2].getReg()); 713 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 714 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 715 return true; 716 } 717 718 return false; 719} 720 721unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 722 const MachineInstr *MI, 723 unsigned *PredCost) const { 724 if (PredCost) 725 *PredCost = 2; 726 return 2; 727} 728 729int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { 730 const MachineRegisterInfo &MRI = MF.getRegInfo(); 731 const MachineFrameInfo *MFI = MF.getFrameInfo(); 732 int Offset = 0; 733 734 if (MFI->getNumObjects() == 0) { 735 return -1; 736 } 737 738 if (MRI.livein_empty()) { 739 return 0; 740 } 741 742 for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), 743 LE = MRI.livein_end(); 744 LI != LE; ++LI) { 745 Offset = std::max(Offset, 746 GET_REG_INDEX(RI.getEncodingValue(LI->first))); 747 } 748 749 return Offset + 1; 750} 751 752int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { 753 int Offset = 0; 754 const MachineFrameInfo *MFI = MF.getFrameInfo(); 755 756 // Variable sized objects are not supported 757 assert(!MFI->hasVarSizedObjects()); 758 759 if (MFI->getNumObjects() == 0) { 760 return -1; 761 } 762 763 Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); 764 765 return getIndirectIndexBegin(MF) + Offset; 766} 767 768std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs( 769 const MachineFunction &MF) const { 770 const AMDGPUFrameLowering *TFL = 771 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 772 std::vector<unsigned> Regs; 773 774 unsigned StackWidth = TFL->getStackWidth(MF); 775 int End = getIndirectIndexEnd(MF); 776 777 if (End == -1) { 778 return Regs; 779 } 780 781 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 782 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 783 Regs.push_back(SuperReg); 784 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 785 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 786 Regs.push_back(Reg); 787 } 788 } 789 return Regs; 790} 791 792unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 793 unsigned Channel) const { 794 // XXX: Remove when we support a stack width > 2 795 assert(Channel == 0); 796 return RegIndex; 797} 798 799const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass( 800 unsigned SourceReg) const { 801 return &AMDGPU::R600_TReg32RegClass; 802} 803 804const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const { 805 return &AMDGPU::TRegMemRegClass; 806} 807 808MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 809 MachineBasicBlock::iterator I, 810 unsigned ValueReg, unsigned Address, 811 unsigned OffsetReg) const { 812 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 813 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 814 AMDGPU::AR_X, OffsetReg); 815 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 816 817 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 818 AddrReg, ValueReg) 819 .addReg(AMDGPU::AR_X, 820 RegState::Implicit | RegState::Kill); 821 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 822 return Mov; 823} 824 825MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 826 MachineBasicBlock::iterator I, 827 unsigned ValueReg, unsigned Address, 828 unsigned OffsetReg) const { 829 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 830 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 831 AMDGPU::AR_X, 832 OffsetReg); 833 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 834 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 835 ValueReg, 836 AddrReg) 837 .addReg(AMDGPU::AR_X, 838 RegState::Implicit | RegState::Kill); 839 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 840 841 return Mov; 842} 843 844const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const { 845 return &AMDGPU::IndirectRegRegClass; 846} 847 848unsigned R600InstrInfo::getMaxAlusPerClause() const { 849 return 115; 850} 851 852MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 853 MachineBasicBlock::iterator I, 854 unsigned Opcode, 855 unsigned DstReg, 856 unsigned Src0Reg, 857 unsigned Src1Reg) const { 858 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 859 DstReg); // $dst 860 861 if (Src1Reg) { 862 MIB.addImm(0) // $update_exec_mask 863 .addImm(0); // $update_predicate 864 } 865 MIB.addImm(1) // $write 866 .addImm(0) // $omod 867 .addImm(0) // $dst_rel 868 .addImm(0) // $dst_clamp 869 .addReg(Src0Reg) // $src0 870 .addImm(0) // $src0_neg 871 .addImm(0) // $src0_rel 872 .addImm(0) // $src0_abs 873 .addImm(-1); // $src0_sel 874 875 if (Src1Reg) { 876 MIB.addReg(Src1Reg) // $src1 877 .addImm(0) // $src1_neg 878 .addImm(0) // $src1_rel 879 .addImm(0) // $src1_abs 880 .addImm(-1); // $src1_sel 881 } 882 883 //XXX: The r600g finalizer expects this to be 1, once we've moved the 884 //scheduling to the backend, we can change the default to 0. 885 MIB.addImm(1) // $last 886 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 887 .addImm(0) // $literal 888 .addImm(0); // $bank_swizzle 889 890 return MIB; 891} 892 893#define OPERAND_CASE(Label) \ 894 case Label: { \ 895 static const unsigned Ops[] = \ 896 { \ 897 Label##_X, \ 898 Label##_Y, \ 899 Label##_Z, \ 900 Label##_W \ 901 }; \ 902 return Ops[Slot]; \ 903 } 904 905static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 906 switch (Op) { 907 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 908 OPERAND_CASE(AMDGPU::OpName::update_pred) 909 OPERAND_CASE(AMDGPU::OpName::write) 910 OPERAND_CASE(AMDGPU::OpName::omod) 911 OPERAND_CASE(AMDGPU::OpName::dst_rel) 912 OPERAND_CASE(AMDGPU::OpName::clamp) 913 OPERAND_CASE(AMDGPU::OpName::src0) 914 OPERAND_CASE(AMDGPU::OpName::src0_neg) 915 OPERAND_CASE(AMDGPU::OpName::src0_rel) 916 OPERAND_CASE(AMDGPU::OpName::src0_abs) 917 OPERAND_CASE(AMDGPU::OpName::src0_sel) 918 OPERAND_CASE(AMDGPU::OpName::src1) 919 OPERAND_CASE(AMDGPU::OpName::src1_neg) 920 OPERAND_CASE(AMDGPU::OpName::src1_rel) 921 OPERAND_CASE(AMDGPU::OpName::src1_abs) 922 OPERAND_CASE(AMDGPU::OpName::src1_sel) 923 OPERAND_CASE(AMDGPU::OpName::pred_sel) 924 default: 925 llvm_unreachable("Wrong Operand"); 926 } 927} 928 929#undef OPERAND_CASE 930 931MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 932 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 933 const { 934 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 935 unsigned Opcode; 936 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 937 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 938 Opcode = AMDGPU::DOT4_r600; 939 else 940 Opcode = AMDGPU::DOT4_eg; 941 MachineBasicBlock::iterator I = MI; 942 MachineOperand &Src0 = MI->getOperand( 943 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 944 MachineOperand &Src1 = MI->getOperand( 945 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 946 MachineInstr *MIB = buildDefaultInstruction( 947 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 948 static const unsigned Operands[14] = { 949 AMDGPU::OpName::update_exec_mask, 950 AMDGPU::OpName::update_pred, 951 AMDGPU::OpName::write, 952 AMDGPU::OpName::omod, 953 AMDGPU::OpName::dst_rel, 954 AMDGPU::OpName::clamp, 955 AMDGPU::OpName::src0_neg, 956 AMDGPU::OpName::src0_rel, 957 AMDGPU::OpName::src0_abs, 958 AMDGPU::OpName::src0_sel, 959 AMDGPU::OpName::src1_neg, 960 AMDGPU::OpName::src1_rel, 961 AMDGPU::OpName::src1_abs, 962 AMDGPU::OpName::src1_sel, 963 }; 964 965 for (unsigned i = 0; i < 14; i++) { 966 MachineOperand &MO = MI->getOperand( 967 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 968 assert (MO.isImm()); 969 setImmOperand(MIB, Operands[i], MO.getImm()); 970 } 971 MIB->getOperand(20).setImm(0); 972 return MIB; 973} 974 975MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 976 MachineBasicBlock::iterator I, 977 unsigned DstReg, 978 uint64_t Imm) const { 979 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 980 AMDGPU::ALU_LITERAL_X); 981 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 982 return MovImm; 983} 984 985int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 986 return getOperandIdx(MI.getOpcode(), Op); 987} 988 989int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 990 return AMDGPU::getNamedOperandIdx(Opcode, Op); 991} 992 993void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 994 int64_t Imm) const { 995 int Idx = getOperandIdx(*MI, Op); 996 assert(Idx != -1 && "Operand not supported for this instruction."); 997 assert(MI->getOperand(Idx).isImm()); 998 MI->getOperand(Idx).setImm(Imm); 999} 1000 1001//===----------------------------------------------------------------------===// 1002// Instruction flag getters/setters 1003//===----------------------------------------------------------------------===// 1004 1005bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1006 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1007} 1008 1009MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1010 unsigned Flag) const { 1011 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1012 int FlagIndex = 0; 1013 if (Flag != 0) { 1014 // If we pass something other than the default value of Flag to this 1015 // function, it means we are want to set a flag on an instruction 1016 // that uses native encoding. 1017 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1018 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1019 switch (Flag) { 1020 case MO_FLAG_CLAMP: 1021 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1022 break; 1023 case MO_FLAG_MASK: 1024 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1025 break; 1026 case MO_FLAG_NOT_LAST: 1027 case MO_FLAG_LAST: 1028 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1029 break; 1030 case MO_FLAG_NEG: 1031 switch (SrcIdx) { 1032 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1033 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1034 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1035 } 1036 break; 1037 1038 case MO_FLAG_ABS: 1039 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1040 "instructions."); 1041 (void)IsOP3; 1042 switch (SrcIdx) { 1043 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1044 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1045 } 1046 break; 1047 1048 default: 1049 FlagIndex = -1; 1050 break; 1051 } 1052 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1053 } else { 1054 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1055 assert(FlagIndex != 0 && 1056 "Instruction flags not supported for this instruction"); 1057 } 1058 1059 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1060 assert(FlagOp.isImm()); 1061 return FlagOp; 1062} 1063 1064void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1065 unsigned Flag) const { 1066 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1067 if (Flag == 0) { 1068 return; 1069 } 1070 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1071 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1072 if (Flag == MO_FLAG_NOT_LAST) { 1073 clearFlag(MI, Operand, MO_FLAG_LAST); 1074 } else if (Flag == MO_FLAG_MASK) { 1075 clearFlag(MI, Operand, Flag); 1076 } else { 1077 FlagOp.setImm(1); 1078 } 1079 } else { 1080 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1081 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1082 } 1083} 1084 1085void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1086 unsigned Flag) const { 1087 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1088 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1089 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1090 FlagOp.setImm(0); 1091 } else { 1092 MachineOperand &FlagOp = getFlagOp(MI); 1093 unsigned InstFlags = FlagOp.getImm(); 1094 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1095 FlagOp.setImm(InstFlags); 1096 } 1097} 1098