R600InstrInfo.cpp revision 05882985757e655f5298af483c881008d45e6249
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Implementation of TargetInstrInfo. 11// 12//===----------------------------------------------------------------------===// 13 14#include "R600InstrInfo.h" 15#include "AMDGPUTargetMachine.h" 16#include "AMDGPUSubtarget.h" 17#include "R600RegisterInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "AMDILUtilityFunctions.h" 20 21#define GET_INSTRINFO_CTOR 22#include "AMDGPUGenDFAPacketizer.inc" 23 24using namespace llvm; 25 26R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 27 : AMDGPUInstrInfo(tm), 28 RI(tm, *this), 29 TM(tm) 30 { } 31 32const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const 33{ 34 return RI; 35} 36 37bool R600InstrInfo::isTrig(const MachineInstr &MI) const 38{ 39 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 40} 41 42bool R600InstrInfo::isVector(const MachineInstr &MI) const 43{ 44 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 45} 46 47void 48R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 49 MachineBasicBlock::iterator MI, DebugLoc DL, 50 unsigned DestReg, unsigned SrcReg, 51 bool KillSrc) const 52{ 53 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 54 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 55 for (unsigned i = 0; i < 4; i++) { 56 unsigned SubRegIndex = RI.getSubRegFromChannel(i); 57 BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) 58 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) 59 .addReg(RI.getSubReg(SrcReg, SubRegIndex)) 60 .addReg(0) // PREDICATE_BIT 61 .addReg(DestReg, RegState::Define | RegState::Implicit); 62 } 63 } else { 64 65 /* We can't copy vec4 registers */ 66 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 67 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 68 69 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) 70 .addReg(SrcReg, getKillRegState(KillSrc)) 71 .addReg(0); // PREDICATE_BIT 72 } 73} 74 75MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 76 unsigned DstReg, int64_t Imm) const 77{ 78 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 79 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); 80 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); 81 MachineInstrBuilder(MI).addImm(Imm); 82 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT 83 84 return MI; 85} 86 87unsigned R600InstrInfo::getIEQOpcode() const 88{ 89 return AMDGPU::SETE_INT; 90} 91 92bool R600InstrInfo::isMov(unsigned Opcode) const 93{ 94 95 96 switch(Opcode) { 97 default: return false; 98 case AMDGPU::MOV: 99 case AMDGPU::MOV_IMM_F32: 100 case AMDGPU::MOV_IMM_I32: 101 return true; 102 } 103} 104 105// Some instructions act as place holders to emulate operations that the GPU 106// hardware does automatically. This function can be used to check if 107// an opcode falls into this category. 108bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const 109{ 110 switch (opcode) { 111 default: return false; 112 case AMDGPU::RETURN: 113 case AMDGPU::LAST: 114 case AMDGPU::MASK_WRITE: 115 case AMDGPU::RESERVE_REG: 116 return true; 117 } 118} 119 120bool R600InstrInfo::isTexOp(unsigned opcode) const 121{ 122 switch(opcode) { 123 default: return false; 124 case AMDGPU::TEX_LD: 125 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 126 case AMDGPU::TEX_SAMPLE: 127 case AMDGPU::TEX_SAMPLE_C: 128 case AMDGPU::TEX_SAMPLE_L: 129 case AMDGPU::TEX_SAMPLE_C_L: 130 case AMDGPU::TEX_SAMPLE_LB: 131 case AMDGPU::TEX_SAMPLE_C_LB: 132 case AMDGPU::TEX_SAMPLE_G: 133 case AMDGPU::TEX_SAMPLE_C_G: 134 case AMDGPU::TEX_GET_GRADIENTS_H: 135 case AMDGPU::TEX_GET_GRADIENTS_V: 136 case AMDGPU::TEX_SET_GRADIENTS_H: 137 case AMDGPU::TEX_SET_GRADIENTS_V: 138 return true; 139 } 140} 141 142bool R600InstrInfo::isReductionOp(unsigned opcode) const 143{ 144 switch(opcode) { 145 default: return false; 146 case AMDGPU::DOT4_r600: 147 case AMDGPU::DOT4_eg: 148 return true; 149 } 150} 151 152bool R600InstrInfo::isCubeOp(unsigned opcode) const 153{ 154 switch(opcode) { 155 default: return false; 156 case AMDGPU::CUBE_r600: 157 case AMDGPU::CUBE_eg: 158 return true; 159 } 160} 161 162 163bool R600InstrInfo::isFCOp(unsigned opcode) const 164{ 165 switch(opcode) { 166 default: return false; 167 case AMDGPU::BREAK_LOGICALZ_f32: 168 case AMDGPU::BREAK_LOGICALNZ_i32: 169 case AMDGPU::BREAK_LOGICALZ_i32: 170 case AMDGPU::BREAK_LOGICALNZ_f32: 171 case AMDGPU::CONTINUE_LOGICALNZ_f32: 172 case AMDGPU::IF_LOGICALNZ_i32: 173 case AMDGPU::IF_LOGICALZ_f32: 174 case AMDGPU::ELSE: 175 case AMDGPU::ENDIF: 176 case AMDGPU::ENDLOOP: 177 case AMDGPU::IF_LOGICALNZ_f32: 178 case AMDGPU::WHILELOOP: 179 return true; 180 } 181} 182 183DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 184 const ScheduleDAG *DAG) const 185{ 186 const InstrItineraryData *II = TM->getInstrItineraryData(); 187 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 188} 189 190static bool 191isPredicateSetter(unsigned opcode) 192{ 193 switch (opcode) { 194 case AMDGPU::PRED_X: 195 return true; 196 default: 197 return false; 198 } 199} 200 201static MachineInstr * 202findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 203 MachineBasicBlock::iterator I) 204{ 205 while (I != MBB.begin()) { 206 --I; 207 MachineInstr *MI = I; 208 if (isPredicateSetter(MI->getOpcode())) 209 return MI; 210 } 211 212 return NULL; 213} 214 215bool 216R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 217 MachineBasicBlock *&TBB, 218 MachineBasicBlock *&FBB, 219 SmallVectorImpl<MachineOperand> &Cond, 220 bool AllowModify) const 221{ 222 // Most of the following comes from the ARM implementation of AnalyzeBranch 223 224 // If the block has no terminators, it just falls into the block after it. 225 MachineBasicBlock::iterator I = MBB.end(); 226 if (I == MBB.begin()) 227 return false; 228 --I; 229 while (I->isDebugValue()) { 230 if (I == MBB.begin()) 231 return false; 232 --I; 233 } 234 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { 235 return false; 236 } 237 238 // Get the last instruction in the block. 239 MachineInstr *LastInst = I; 240 241 // If there is only one terminator instruction, process it. 242 unsigned LastOpc = LastInst->getOpcode(); 243 if (I == MBB.begin() || 244 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { 245 if (LastOpc == AMDGPU::JUMP) { 246 if(!isPredicated(LastInst)) { 247 TBB = LastInst->getOperand(0).getMBB(); 248 return false; 249 } else { 250 MachineInstr *predSet = I; 251 while (!isPredicateSetter(predSet->getOpcode())) { 252 predSet = --I; 253 } 254 TBB = LastInst->getOperand(0).getMBB(); 255 Cond.push_back(predSet->getOperand(1)); 256 Cond.push_back(predSet->getOperand(2)); 257 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 258 return false; 259 } 260 } 261 return true; // Can't handle indirect branch. 262 } 263 264 // Get the instruction before it if it is a terminator. 265 MachineInstr *SecondLastInst = I; 266 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 267 268 // If the block ends with a B and a Bcc, handle it. 269 if (SecondLastOpc == AMDGPU::JUMP && 270 isPredicated(SecondLastInst) && 271 LastOpc == AMDGPU::JUMP && 272 !isPredicated(LastInst)) { 273 MachineInstr *predSet = --I; 274 while (!isPredicateSetter(predSet->getOpcode())) { 275 predSet = --I; 276 } 277 TBB = SecondLastInst->getOperand(0).getMBB(); 278 FBB = LastInst->getOperand(0).getMBB(); 279 Cond.push_back(predSet->getOperand(1)); 280 Cond.push_back(predSet->getOperand(2)); 281 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 282 return false; 283 } 284 285 // Otherwise, can't handle this. 286 return true; 287} 288 289int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 290 const MachineInstr *MI = op.getParent(); 291 292 switch (MI->getDesc().OpInfo->RegClass) { 293 default: // FIXME: fallthrough?? 294 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 295 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 296 }; 297} 298 299unsigned 300R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 301 MachineBasicBlock *TBB, 302 MachineBasicBlock *FBB, 303 const SmallVectorImpl<MachineOperand> &Cond, 304 DebugLoc DL) const 305{ 306 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 307 308 if (FBB == 0) { 309 if (Cond.empty()) { 310 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); 311 return 1; 312 } else { 313 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 314 assert(PredSet && "No previous predicate !"); 315 PredSet->getOperand(1).addTargetFlag(1<<4); 316 PredSet->getOperand(2).setImm(Cond[1].getImm()); 317 318 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 319 .addMBB(TBB) 320 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 321 return 1; 322 } 323 } else { 324 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 325 assert(PredSet && "No previous predicate !"); 326 PredSet->getOperand(1).addTargetFlag(1<<4); 327 PredSet->getOperand(2).setImm(Cond[1].getImm()); 328 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 329 .addMBB(TBB) 330 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 331 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); 332 return 2; 333 } 334} 335 336unsigned 337R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const 338{ 339 340 // Note : we leave PRED* instructions there. 341 // They may be needed when predicating instructions. 342 343 MachineBasicBlock::iterator I = MBB.end(); 344 345 if (I == MBB.begin()) { 346 return 0; 347 } 348 --I; 349 switch (I->getOpcode()) { 350 default: 351 return 0; 352 case AMDGPU::JUMP: 353 if (isPredicated(I)) { 354 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 355 char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); 356 predSet->getOperand(1).setTargetFlags(flag); 357 } 358 I->eraseFromParent(); 359 break; 360 } 361 I = MBB.end(); 362 363 if (I == MBB.begin()) { 364 return 1; 365 } 366 --I; 367 switch (I->getOpcode()) { 368 // FIXME: only one case?? 369 default: 370 return 1; 371 case AMDGPU::JUMP: 372 if (isPredicated(I)) { 373 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 374 char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); 375 predSet->getOperand(1).setTargetFlags(flag); 376 } 377 I->eraseFromParent(); 378 break; 379 } 380 return 2; 381} 382 383bool 384R600InstrInfo::isPredicated(const MachineInstr *MI) const 385{ 386 int idx = MI->findFirstPredOperandIdx(); 387 if (idx < 0) 388 return false; 389 390 unsigned Reg = MI->getOperand(idx).getReg(); 391 switch (Reg) { 392 default: return false; 393 case AMDGPU::PRED_SEL_ONE: 394 case AMDGPU::PRED_SEL_ZERO: 395 case AMDGPU::PREDICATE_BIT: 396 return true; 397 } 398} 399 400bool 401R600InstrInfo::isPredicable(MachineInstr *MI) const 402{ 403 return AMDGPUInstrInfo::isPredicable(MI); 404} 405 406 407bool 408R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 409 unsigned NumCyles, 410 unsigned ExtraPredCycles, 411 const BranchProbability &Probability) const{ 412 return true; 413} 414 415bool 416R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 417 unsigned NumTCycles, 418 unsigned ExtraTCycles, 419 MachineBasicBlock &FMBB, 420 unsigned NumFCycles, 421 unsigned ExtraFCycles, 422 const BranchProbability &Probability) const 423{ 424 return true; 425} 426 427bool 428R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 429 unsigned NumCyles, 430 const BranchProbability &Probability) 431 const 432{ 433 return true; 434} 435 436bool 437R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 438 MachineBasicBlock &FMBB) const 439{ 440 return false; 441} 442 443 444bool 445R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 446{ 447 MachineOperand &MO = Cond[1]; 448 switch (MO.getImm()) { 449 case OPCODE_IS_ZERO_INT: 450 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 451 break; 452 case OPCODE_IS_NOT_ZERO_INT: 453 MO.setImm(OPCODE_IS_ZERO_INT); 454 break; 455 case OPCODE_IS_ZERO: 456 MO.setImm(OPCODE_IS_NOT_ZERO); 457 break; 458 case OPCODE_IS_NOT_ZERO: 459 MO.setImm(OPCODE_IS_ZERO); 460 break; 461 default: 462 return true; 463 } 464 465 MachineOperand &MO2 = Cond[2]; 466 switch (MO2.getReg()) { 467 case AMDGPU::PRED_SEL_ZERO: 468 MO2.setReg(AMDGPU::PRED_SEL_ONE); 469 break; 470 case AMDGPU::PRED_SEL_ONE: 471 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 472 break; 473 default: 474 return true; 475 } 476 return false; 477} 478 479bool 480R600InstrInfo::DefinesPredicate(MachineInstr *MI, 481 std::vector<MachineOperand> &Pred) const 482{ 483 return isPredicateSetter(MI->getOpcode()); 484} 485 486 487bool 488R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 489 const SmallVectorImpl<MachineOperand> &Pred2) const 490{ 491 return false; 492} 493 494 495bool 496R600InstrInfo::PredicateInstruction(MachineInstr *MI, 497 const SmallVectorImpl<MachineOperand> &Pred) const 498{ 499 int PIdx = MI->findFirstPredOperandIdx(); 500 501 if (PIdx != -1) { 502 MachineOperand &PMO = MI->getOperand(PIdx); 503 PMO.setReg(Pred[2].getReg()); 504 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 505 return true; 506 } 507 508 return false; 509} 510 511int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 512 const MachineInstr *MI, 513 unsigned *PredCost) const 514{ 515 if (PredCost) 516 *PredCost = 2; 517 return 2; 518} 519