R600InstrInfo.cpp revision 1cb07bd3b8abd5e52e9dbd80bb1666058545387e
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Implementation of TargetInstrInfo. 11// 12//===----------------------------------------------------------------------===// 13 14#include "R600InstrInfo.h" 15#include "AMDGPUTargetMachine.h" 16#include "AMDGPUSubtarget.h" 17#include "R600RegisterInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "AMDILUtilityFunctions.h" 20 21#define GET_INSTRINFO_CTOR 22#include "AMDGPUGenDFAPacketizer.inc" 23 24using namespace llvm; 25 26R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 27 : AMDGPUInstrInfo(tm), 28 RI(tm, *this), 29 TM(tm) 30 { } 31 32const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const 33{ 34 return RI; 35} 36 37bool R600InstrInfo::isTrig(const MachineInstr &MI) const 38{ 39 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 40} 41 42bool R600InstrInfo::isVector(const MachineInstr &MI) const 43{ 44 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 45} 46 47void 48R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 49 MachineBasicBlock::iterator MI, DebugLoc DL, 50 unsigned DestReg, unsigned SrcReg, 51 bool KillSrc) const 52{ 53 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 54 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 55 for (unsigned i = 0; i < 4; i++) { 56 unsigned SubRegIndex = RI.getSubRegFromChannel(i); 57 BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) 58 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) 59 .addReg(RI.getSubReg(SrcReg, SubRegIndex)) 60 .addReg(0) // PREDICATE_BIT 61 .addReg(DestReg, RegState::Define | RegState::Implicit); 62 } 63 } else { 64 65 /* We can't copy vec4 registers */ 66 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 67 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 68 69 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) 70 .addReg(SrcReg, getKillRegState(KillSrc)) 71 .addReg(0); // PREDICATE_BIT 72 } 73} 74 75MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 76 unsigned DstReg, int64_t Imm) const 77{ 78 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 79 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); 80 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); 81 MachineInstrBuilder(MI).addImm(Imm); 82 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT 83 84 return MI; 85} 86 87unsigned R600InstrInfo::getIEQOpcode() const 88{ 89 return AMDGPU::SETE_INT; 90} 91 92bool R600InstrInfo::isMov(unsigned Opcode) const 93{ 94 95 96 switch(Opcode) { 97 default: return false; 98 case AMDGPU::MOV: 99 case AMDGPU::MOV_IMM_F32: 100 case AMDGPU::MOV_IMM_I32: 101 return true; 102 } 103} 104 105// Some instructions act as place holders to emulate operations that the GPU 106// hardware does automatically. This function can be used to check if 107// an opcode falls into this category. 108bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const 109{ 110 switch (opcode) { 111 default: return false; 112 case AMDGPU::RETURN: 113 case AMDGPU::LAST: 114 case AMDGPU::MASK_WRITE: 115 case AMDGPU::RESERVE_REG: 116 return true; 117 } 118} 119 120bool R600InstrInfo::isTexOp(unsigned opcode) const 121{ 122 switch(opcode) { 123 default: return false; 124 case AMDGPU::TEX_LD: 125 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 126 case AMDGPU::TEX_SAMPLE: 127 case AMDGPU::TEX_SAMPLE_C: 128 case AMDGPU::TEX_SAMPLE_L: 129 case AMDGPU::TEX_SAMPLE_C_L: 130 case AMDGPU::TEX_SAMPLE_LB: 131 case AMDGPU::TEX_SAMPLE_C_LB: 132 case AMDGPU::TEX_SAMPLE_G: 133 case AMDGPU::TEX_SAMPLE_C_G: 134 case AMDGPU::TEX_GET_GRADIENTS_H: 135 case AMDGPU::TEX_GET_GRADIENTS_V: 136 case AMDGPU::TEX_SET_GRADIENTS_H: 137 case AMDGPU::TEX_SET_GRADIENTS_V: 138 return true; 139 } 140} 141 142bool R600InstrInfo::isReductionOp(unsigned opcode) const 143{ 144 switch(opcode) { 145 default: return false; 146 case AMDGPU::DOT4_r600: 147 case AMDGPU::DOT4_eg: 148 return true; 149 } 150} 151 152bool R600InstrInfo::isCubeOp(unsigned opcode) const 153{ 154 switch(opcode) { 155 default: return false; 156 case AMDGPU::CUBE_r600_pseudo: 157 case AMDGPU::CUBE_r600_real: 158 case AMDGPU::CUBE_eg_pseudo: 159 case AMDGPU::CUBE_eg_real: 160 return true; 161 } 162} 163 164 165bool R600InstrInfo::isFCOp(unsigned opcode) const 166{ 167 switch(opcode) { 168 default: return false; 169 case AMDGPU::BREAK_LOGICALZ_f32: 170 case AMDGPU::BREAK_LOGICALNZ_i32: 171 case AMDGPU::BREAK_LOGICALZ_i32: 172 case AMDGPU::BREAK_LOGICALNZ_f32: 173 case AMDGPU::CONTINUE_LOGICALNZ_f32: 174 case AMDGPU::IF_LOGICALNZ_i32: 175 case AMDGPU::IF_LOGICALZ_f32: 176 case AMDGPU::ELSE: 177 case AMDGPU::ENDIF: 178 case AMDGPU::ENDLOOP: 179 case AMDGPU::IF_LOGICALNZ_f32: 180 case AMDGPU::WHILELOOP: 181 return true; 182 } 183} 184 185DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 186 const ScheduleDAG *DAG) const 187{ 188 const InstrItineraryData *II = TM->getInstrItineraryData(); 189 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 190} 191 192static bool 193isPredicateSetter(unsigned opcode) 194{ 195 switch (opcode) { 196 case AMDGPU::PRED_X: 197 return true; 198 default: 199 return false; 200 } 201} 202 203static MachineInstr * 204findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 205 MachineBasicBlock::iterator I) 206{ 207 while (I != MBB.begin()) { 208 --I; 209 MachineInstr *MI = I; 210 if (isPredicateSetter(MI->getOpcode())) 211 return MI; 212 } 213 214 return NULL; 215} 216 217bool 218R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 219 MachineBasicBlock *&TBB, 220 MachineBasicBlock *&FBB, 221 SmallVectorImpl<MachineOperand> &Cond, 222 bool AllowModify) const 223{ 224 // Most of the following comes from the ARM implementation of AnalyzeBranch 225 226 // If the block has no terminators, it just falls into the block after it. 227 MachineBasicBlock::iterator I = MBB.end(); 228 if (I == MBB.begin()) 229 return false; 230 --I; 231 while (I->isDebugValue()) { 232 if (I == MBB.begin()) 233 return false; 234 --I; 235 } 236 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { 237 return false; 238 } 239 240 // Get the last instruction in the block. 241 MachineInstr *LastInst = I; 242 243 // If there is only one terminator instruction, process it. 244 unsigned LastOpc = LastInst->getOpcode(); 245 if (I == MBB.begin() || 246 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { 247 if (LastOpc == AMDGPU::JUMP) { 248 if(!isPredicated(LastInst)) { 249 TBB = LastInst->getOperand(0).getMBB(); 250 return false; 251 } else { 252 MachineInstr *predSet = I; 253 while (!isPredicateSetter(predSet->getOpcode())) { 254 predSet = --I; 255 } 256 TBB = LastInst->getOperand(0).getMBB(); 257 Cond.push_back(predSet->getOperand(1)); 258 Cond.push_back(predSet->getOperand(2)); 259 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 260 return false; 261 } 262 } 263 return true; // Can't handle indirect branch. 264 } 265 266 // Get the instruction before it if it is a terminator. 267 MachineInstr *SecondLastInst = I; 268 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 269 270 // If the block ends with a B and a Bcc, handle it. 271 if (SecondLastOpc == AMDGPU::JUMP && 272 isPredicated(SecondLastInst) && 273 LastOpc == AMDGPU::JUMP && 274 !isPredicated(LastInst)) { 275 MachineInstr *predSet = --I; 276 while (!isPredicateSetter(predSet->getOpcode())) { 277 predSet = --I; 278 } 279 TBB = SecondLastInst->getOperand(0).getMBB(); 280 FBB = LastInst->getOperand(0).getMBB(); 281 Cond.push_back(predSet->getOperand(1)); 282 Cond.push_back(predSet->getOperand(2)); 283 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 284 return false; 285 } 286 287 // Otherwise, can't handle this. 288 return true; 289} 290 291int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 292 const MachineInstr *MI = op.getParent(); 293 294 switch (MI->getDesc().OpInfo->RegClass) { 295 default: // FIXME: fallthrough?? 296 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 297 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 298 }; 299} 300 301unsigned 302R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 303 MachineBasicBlock *TBB, 304 MachineBasicBlock *FBB, 305 const SmallVectorImpl<MachineOperand> &Cond, 306 DebugLoc DL) const 307{ 308 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 309 310 if (FBB == 0) { 311 if (Cond.empty()) { 312 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); 313 return 1; 314 } else { 315 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 316 assert(PredSet && "No previous predicate !"); 317 PredSet->getOperand(1).addTargetFlag(1<<4); 318 PredSet->getOperand(2).setImm(Cond[1].getImm()); 319 320 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 321 .addMBB(TBB) 322 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 323 return 1; 324 } 325 } else { 326 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 327 assert(PredSet && "No previous predicate !"); 328 PredSet->getOperand(1).addTargetFlag(1<<4); 329 PredSet->getOperand(2).setImm(Cond[1].getImm()); 330 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 331 .addMBB(TBB) 332 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 333 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); 334 return 2; 335 } 336} 337 338unsigned 339R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const 340{ 341 342 // Note : we leave PRED* instructions there. 343 // They may be needed when predicating instructions. 344 345 MachineBasicBlock::iterator I = MBB.end(); 346 347 if (I == MBB.begin()) { 348 return 0; 349 } 350 --I; 351 switch (I->getOpcode()) { 352 default: 353 return 0; 354 case AMDGPU::JUMP: 355 if (isPredicated(I)) { 356 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 357 char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); 358 predSet->getOperand(1).setTargetFlags(flag); 359 } 360 I->eraseFromParent(); 361 break; 362 } 363 I = MBB.end(); 364 365 if (I == MBB.begin()) { 366 return 1; 367 } 368 --I; 369 switch (I->getOpcode()) { 370 // FIXME: only one case?? 371 default: 372 return 1; 373 case AMDGPU::JUMP: 374 if (isPredicated(I)) { 375 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 376 char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); 377 predSet->getOperand(1).setTargetFlags(flag); 378 } 379 I->eraseFromParent(); 380 break; 381 } 382 return 2; 383} 384 385bool 386R600InstrInfo::isPredicated(const MachineInstr *MI) const 387{ 388 int idx = MI->findFirstPredOperandIdx(); 389 if (idx < 0) 390 return false; 391 392 unsigned Reg = MI->getOperand(idx).getReg(); 393 switch (Reg) { 394 default: return false; 395 case AMDGPU::PRED_SEL_ONE: 396 case AMDGPU::PRED_SEL_ZERO: 397 case AMDGPU::PREDICATE_BIT: 398 return true; 399 } 400} 401 402bool 403R600InstrInfo::isPredicable(MachineInstr *MI) const 404{ 405 return AMDGPUInstrInfo::isPredicable(MI); 406} 407 408 409bool 410R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 411 unsigned NumCyles, 412 unsigned ExtraPredCycles, 413 const BranchProbability &Probability) const{ 414 return true; 415} 416 417bool 418R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 419 unsigned NumTCycles, 420 unsigned ExtraTCycles, 421 MachineBasicBlock &FMBB, 422 unsigned NumFCycles, 423 unsigned ExtraFCycles, 424 const BranchProbability &Probability) const 425{ 426 return true; 427} 428 429bool 430R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 431 unsigned NumCyles, 432 const BranchProbability &Probability) 433 const 434{ 435 return true; 436} 437 438bool 439R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 440 MachineBasicBlock &FMBB) const 441{ 442 return false; 443} 444 445 446bool 447R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 448{ 449 MachineOperand &MO = Cond[1]; 450 switch (MO.getImm()) { 451 case OPCODE_IS_ZERO_INT: 452 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 453 break; 454 case OPCODE_IS_NOT_ZERO_INT: 455 MO.setImm(OPCODE_IS_ZERO_INT); 456 break; 457 case OPCODE_IS_ZERO: 458 MO.setImm(OPCODE_IS_NOT_ZERO); 459 break; 460 case OPCODE_IS_NOT_ZERO: 461 MO.setImm(OPCODE_IS_ZERO); 462 break; 463 default: 464 return true; 465 } 466 467 MachineOperand &MO2 = Cond[2]; 468 switch (MO2.getReg()) { 469 case AMDGPU::PRED_SEL_ZERO: 470 MO2.setReg(AMDGPU::PRED_SEL_ONE); 471 break; 472 case AMDGPU::PRED_SEL_ONE: 473 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 474 break; 475 default: 476 return true; 477 } 478 return false; 479} 480 481bool 482R600InstrInfo::DefinesPredicate(MachineInstr *MI, 483 std::vector<MachineOperand> &Pred) const 484{ 485 return isPredicateSetter(MI->getOpcode()); 486} 487 488 489bool 490R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 491 const SmallVectorImpl<MachineOperand> &Pred2) const 492{ 493 return false; 494} 495 496 497bool 498R600InstrInfo::PredicateInstruction(MachineInstr *MI, 499 const SmallVectorImpl<MachineOperand> &Pred) const 500{ 501 int PIdx = MI->findFirstPredOperandIdx(); 502 503 if (PIdx != -1) { 504 MachineOperand &PMO = MI->getOperand(PIdx); 505 PMO.setReg(Pred[2].getReg()); 506 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 507 return true; 508 } 509 510 return false; 511} 512 513int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 514 const MachineInstr *MI, 515 unsigned *PredCost) const 516{ 517 if (PredCost) 518 *PredCost = 2; 519 return 2; 520} 521