R600InstrInfo.cpp revision a614979286f8d329af318c1e9fb067e17cab4315
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Implementation of TargetInstrInfo. 11// 12//===----------------------------------------------------------------------===// 13 14#include "R600InstrInfo.h" 15#include "AMDGPUTargetMachine.h" 16#include "AMDGPUSubtarget.h" 17#include "R600RegisterInfo.h" 18#include "llvm/CodeGen/MachineInstrBuilder.h" 19#include "AMDILUtilityFunctions.h" 20 21#define GET_INSTRINFO_CTOR 22#include "AMDGPUGenDFAPacketizer.inc" 23 24using namespace llvm; 25 26R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 27 : AMDGPUInstrInfo(tm), 28 RI(tm, *this), 29 TM(tm) 30 { } 31 32const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const 33{ 34 return RI; 35} 36 37bool R600InstrInfo::isTrig(const MachineInstr &MI) const 38{ 39 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 40} 41 42bool R600InstrInfo::isVector(const MachineInstr &MI) const 43{ 44 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 45} 46 47void 48R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 49 MachineBasicBlock::iterator MI, DebugLoc DL, 50 unsigned DestReg, unsigned SrcReg, 51 bool KillSrc) const 52{ 53 54 unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y, 55 AMDGPU::sel_z, AMDGPU::sel_w}; 56 57 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 58 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 59 for (unsigned i = 0; i < 4; i++) { 60 BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) 61 .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define) 62 .addReg(RI.getSubReg(SrcReg, subRegMap[i])) 63 .addReg(0) // PREDICATE_BIT 64 .addReg(DestReg, RegState::Define | RegState::Implicit); 65 } 66 } else { 67 68 /* We can't copy vec4 registers */ 69 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 70 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 71 72 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) 73 .addReg(SrcReg, getKillRegState(KillSrc)) 74 .addReg(0); // PREDICATE_BIT 75 } 76} 77 78MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 79 unsigned DstReg, int64_t Imm) const 80{ 81 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 82 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); 83 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); 84 MachineInstrBuilder(MI).addImm(Imm); 85 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT 86 87 return MI; 88} 89 90unsigned R600InstrInfo::getIEQOpcode() const 91{ 92 return AMDGPU::SETE_INT; 93} 94 95bool R600InstrInfo::isMov(unsigned Opcode) const 96{ 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106} 107 108// Some instructions act as place holders to emulate operations that the GPU 109// hardware does automatically. This function can be used to check if 110// an opcode falls into this category. 111bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const 112{ 113 switch (opcode) { 114 default: return false; 115 case AMDGPU::RETURN: 116 case AMDGPU::LAST: 117 case AMDGPU::MASK_WRITE: 118 case AMDGPU::RESERVE_REG: 119 return true; 120 } 121} 122 123bool R600InstrInfo::isTexOp(unsigned opcode) const 124{ 125 switch(opcode) { 126 default: return false; 127 case AMDGPU::TEX_LD: 128 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 129 case AMDGPU::TEX_SAMPLE: 130 case AMDGPU::TEX_SAMPLE_C: 131 case AMDGPU::TEX_SAMPLE_L: 132 case AMDGPU::TEX_SAMPLE_C_L: 133 case AMDGPU::TEX_SAMPLE_LB: 134 case AMDGPU::TEX_SAMPLE_C_LB: 135 case AMDGPU::TEX_SAMPLE_G: 136 case AMDGPU::TEX_SAMPLE_C_G: 137 case AMDGPU::TEX_GET_GRADIENTS_H: 138 case AMDGPU::TEX_GET_GRADIENTS_V: 139 case AMDGPU::TEX_SET_GRADIENTS_H: 140 case AMDGPU::TEX_SET_GRADIENTS_V: 141 return true; 142 } 143} 144 145bool R600InstrInfo::isReductionOp(unsigned opcode) const 146{ 147 switch(opcode) { 148 default: return false; 149 case AMDGPU::DOT4_r600: 150 case AMDGPU::DOT4_eg: 151 return true; 152 } 153} 154 155bool R600InstrInfo::isCubeOp(unsigned opcode) const 156{ 157 switch(opcode) { 158 default: return false; 159 case AMDGPU::CUBE_r600: 160 case AMDGPU::CUBE_eg: 161 return true; 162 } 163} 164 165 166bool R600InstrInfo::isFCOp(unsigned opcode) const 167{ 168 switch(opcode) { 169 default: return false; 170 case AMDGPU::BREAK_LOGICALZ_f32: 171 case AMDGPU::BREAK_LOGICALNZ_i32: 172 case AMDGPU::BREAK_LOGICALZ_i32: 173 case AMDGPU::BREAK_LOGICALNZ_f32: 174 case AMDGPU::CONTINUE_LOGICALNZ_f32: 175 case AMDGPU::IF_LOGICALNZ_i32: 176 case AMDGPU::IF_LOGICALZ_f32: 177 case AMDGPU::ELSE: 178 case AMDGPU::ENDIF: 179 case AMDGPU::ENDLOOP: 180 case AMDGPU::IF_LOGICALNZ_f32: 181 case AMDGPU::WHILELOOP: 182 return true; 183 } 184} 185 186DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 187 const ScheduleDAG *DAG) const 188{ 189 const InstrItineraryData *II = TM->getInstrItineraryData(); 190 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 191} 192 193static bool 194isPredicateSetter(unsigned opcode) 195{ 196 switch (opcode) { 197 case AMDGPU::PRED_X: 198 return true; 199 default: 200 return false; 201 } 202} 203 204static MachineInstr * 205findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 206 MachineBasicBlock::iterator I) 207{ 208 while (I != MBB.begin()) { 209 --I; 210 MachineInstr *MI = I; 211 if (isPredicateSetter(MI->getOpcode())) 212 return MI; 213 } 214 215 return NULL; 216} 217 218bool 219R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 220 MachineBasicBlock *&TBB, 221 MachineBasicBlock *&FBB, 222 SmallVectorImpl<MachineOperand> &Cond, 223 bool AllowModify) const 224{ 225 // Most of the following comes from the ARM implementation of AnalyzeBranch 226 227 // If the block has no terminators, it just falls into the block after it. 228 MachineBasicBlock::iterator I = MBB.end(); 229 if (I == MBB.begin()) 230 return false; 231 --I; 232 while (I->isDebugValue()) { 233 if (I == MBB.begin()) 234 return false; 235 --I; 236 } 237 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { 238 return false; 239 } 240 241 // Get the last instruction in the block. 242 MachineInstr *LastInst = I; 243 244 // If there is only one terminator instruction, process it. 245 unsigned LastOpc = LastInst->getOpcode(); 246 if (I == MBB.begin() || 247 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { 248 if (LastOpc == AMDGPU::JUMP) { 249 if(!isPredicated(LastInst)) { 250 TBB = LastInst->getOperand(0).getMBB(); 251 return false; 252 } else { 253 MachineInstr *predSet = I; 254 while (!isPredicateSetter(predSet->getOpcode())) { 255 predSet = --I; 256 } 257 TBB = LastInst->getOperand(0).getMBB(); 258 Cond.push_back(predSet->getOperand(1)); 259 Cond.push_back(predSet->getOperand(2)); 260 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 261 return false; 262 } 263 } 264 return true; // Can't handle indirect branch. 265 } 266 267 // Get the instruction before it if it is a terminator. 268 MachineInstr *SecondLastInst = I; 269 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 270 271 // If the block ends with a B and a Bcc, handle it. 272 if (SecondLastOpc == AMDGPU::JUMP && 273 isPredicated(SecondLastInst) && 274 LastOpc == AMDGPU::JUMP && 275 !isPredicated(LastInst)) { 276 MachineInstr *predSet = --I; 277 while (!isPredicateSetter(predSet->getOpcode())) { 278 predSet = --I; 279 } 280 TBB = SecondLastInst->getOperand(0).getMBB(); 281 FBB = LastInst->getOperand(0).getMBB(); 282 Cond.push_back(predSet->getOperand(1)); 283 Cond.push_back(predSet->getOperand(2)); 284 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 285 return false; 286 } 287 288 // Otherwise, can't handle this. 289 return true; 290} 291 292int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 293 const MachineInstr *MI = op.getParent(); 294 295 switch (MI->getDesc().OpInfo->RegClass) { 296 default: // FIXME: fallthrough?? 297 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 298 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 299 }; 300} 301 302unsigned 303R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 304 MachineBasicBlock *TBB, 305 MachineBasicBlock *FBB, 306 const SmallVectorImpl<MachineOperand> &Cond, 307 DebugLoc DL) const 308{ 309 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 310 311 if (FBB == 0) { 312 if (Cond.empty()) { 313 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); 314 return 1; 315 } else { 316 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 317 assert(PredSet && "No previous predicate !"); 318 PredSet->getOperand(1).addTargetFlag(1<<4); 319 PredSet->getOperand(2).setImm(Cond[1].getImm()); 320 321 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 322 .addMBB(TBB) 323 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 324 return 1; 325 } 326 } else { 327 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 328 assert(PredSet && "No previous predicate !"); 329 PredSet->getOperand(1).addTargetFlag(1<<4); 330 PredSet->getOperand(2).setImm(Cond[1].getImm()); 331 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 332 .addMBB(TBB) 333 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 334 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); 335 return 2; 336 } 337} 338 339unsigned 340R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const 341{ 342 343 // Note : we leave PRED* instructions there. 344 // They may be needed when predicating instructions. 345 346 MachineBasicBlock::iterator I = MBB.end(); 347 348 if (I == MBB.begin()) { 349 return 0; 350 } 351 --I; 352 switch (I->getOpcode()) { 353 default: 354 return 0; 355 case AMDGPU::JUMP: 356 if (isPredicated(I)) { 357 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 358 char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); 359 predSet->getOperand(1).setTargetFlags(flag); 360 } 361 I->eraseFromParent(); 362 break; 363 } 364 I = MBB.end(); 365 366 if (I == MBB.begin()) { 367 return 1; 368 } 369 --I; 370 switch (I->getOpcode()) { 371 // FIXME: only one case?? 372 default: 373 return 1; 374 case AMDGPU::JUMP: 375 if (isPredicated(I)) { 376 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 377 char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4)); 378 predSet->getOperand(1).setTargetFlags(flag); 379 } 380 I->eraseFromParent(); 381 break; 382 } 383 return 2; 384} 385 386bool 387R600InstrInfo::isPredicated(const MachineInstr *MI) const 388{ 389 int idx = MI->findFirstPredOperandIdx(); 390 if (idx < 0) 391 return false; 392 393 unsigned Reg = MI->getOperand(idx).getReg(); 394 switch (Reg) { 395 default: return false; 396 case AMDGPU::PRED_SEL_ONE: 397 case AMDGPU::PRED_SEL_ZERO: 398 case AMDGPU::PREDICATE_BIT: 399 return true; 400 } 401} 402 403bool 404R600InstrInfo::isPredicable(MachineInstr *MI) const 405{ 406 return AMDGPUInstrInfo::isPredicable(MI); 407} 408 409 410bool 411R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 412 unsigned NumCyles, 413 unsigned ExtraPredCycles, 414 const BranchProbability &Probability) const{ 415 return true; 416} 417 418bool 419R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 420 unsigned NumTCycles, 421 unsigned ExtraTCycles, 422 MachineBasicBlock &FMBB, 423 unsigned NumFCycles, 424 unsigned ExtraFCycles, 425 const BranchProbability &Probability) const 426{ 427 return true; 428} 429 430bool 431R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 432 unsigned NumCyles, 433 const BranchProbability &Probability) 434 const 435{ 436 return true; 437} 438 439bool 440R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 441 MachineBasicBlock &FMBB) const 442{ 443 return false; 444} 445 446 447bool 448R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 449{ 450 MachineOperand &MO = Cond[1]; 451 switch (MO.getImm()) { 452 case OPCODE_IS_ZERO_INT: 453 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 454 break; 455 case OPCODE_IS_NOT_ZERO_INT: 456 MO.setImm(OPCODE_IS_ZERO_INT); 457 break; 458 case OPCODE_IS_ZERO: 459 MO.setImm(OPCODE_IS_NOT_ZERO); 460 break; 461 case OPCODE_IS_NOT_ZERO: 462 MO.setImm(OPCODE_IS_ZERO); 463 break; 464 default: 465 return true; 466 } 467 468 MachineOperand &MO2 = Cond[2]; 469 switch (MO2.getReg()) { 470 case AMDGPU::PRED_SEL_ZERO: 471 MO2.setReg(AMDGPU::PRED_SEL_ONE); 472 break; 473 case AMDGPU::PRED_SEL_ONE: 474 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 475 break; 476 default: 477 return true; 478 } 479 return false; 480} 481 482bool 483R600InstrInfo::DefinesPredicate(MachineInstr *MI, 484 std::vector<MachineOperand> &Pred) const 485{ 486 return isPredicateSetter(MI->getOpcode()); 487} 488 489 490bool 491R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 492 const SmallVectorImpl<MachineOperand> &Pred2) const 493{ 494 return false; 495} 496 497 498bool 499R600InstrInfo::PredicateInstruction(MachineInstr *MI, 500 const SmallVectorImpl<MachineOperand> &Pred) const 501{ 502 int PIdx = MI->findFirstPredOperandIdx(); 503 504 if (PIdx != -1) { 505 MachineOperand &PMO = MI->getOperand(PIdx); 506 PMO.setReg(Pred[2].getReg()); 507 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 508 return true; 509 } 510 511 return false; 512} 513 514int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 515 const MachineInstr *MI, 516 unsigned *PredCost) const 517{ 518 if (PredCost) 519 *PredCost = 2; 520 return 2; 521} 522