1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Implementation of TargetInstrInfo. 11// 12//===----------------------------------------------------------------------===// 13 14#include "R600InstrInfo.h" 15#include "AMDGPUTargetMachine.h" 16#include "AMDGPUSubtarget.h" 17#include "R600Defines.h" 18#include "R600RegisterInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "AMDILUtilityFunctions.h" 21 22#define GET_INSTRINFO_CTOR 23#include "AMDGPUGenDFAPacketizer.inc" 24 25using namespace llvm; 26 27R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 28 : AMDGPUInstrInfo(tm), 29 RI(tm, *this), 30 TM(tm) 31 { } 32 33const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const 34{ 35 return RI; 36} 37 38bool R600InstrInfo::isTrig(const MachineInstr &MI) const 39{ 40 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 41} 42 43bool R600InstrInfo::isVector(const MachineInstr &MI) const 44{ 45 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 46} 47 48void 49R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 50 MachineBasicBlock::iterator MI, DebugLoc DL, 51 unsigned DestReg, unsigned SrcReg, 52 bool KillSrc) const 53{ 54 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 55 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 56 for (unsigned I = 0; I < 4; I++) { 57 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 58 BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) 59 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) 60 .addReg(RI.getSubReg(SrcReg, SubRegIndex)) 61 .addImm(0) // Flag 62 .addReg(0) // PREDICATE_BIT 63 .addReg(DestReg, RegState::Define | RegState::Implicit); 64 } 65 } else { 66 67 /* We can't copy vec4 registers */ 68 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 69 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 70 71 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) 72 .addReg(SrcReg, getKillRegState(KillSrc)) 73 .addImm(0) // Flag 74 .addReg(0); // PREDICATE_BIT 75 } 76} 77 78MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 79 unsigned DstReg, int64_t Imm) const 80{ 81 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 82 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); 83 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); 84 MachineInstrBuilder(MI).addImm(Imm); 85 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT 86 87 return MI; 88} 89 90unsigned R600InstrInfo::getIEQOpcode() const 91{ 92 return AMDGPU::SETE_INT; 93} 94 95bool R600InstrInfo::isMov(unsigned Opcode) const 96{ 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106} 107 108// Some instructions act as place holders to emulate operations that the GPU 109// hardware does automatically. This function can be used to check if 110// an opcode falls into this category. 111bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const 112{ 113 switch (Opcode) { 114 default: return false; 115 case AMDGPU::RETURN: 116 case AMDGPU::MASK_WRITE: 117 case AMDGPU::RESERVE_REG: 118 return true; 119 } 120} 121 122bool R600InstrInfo::isReductionOp(unsigned Opcode) const 123{ 124 switch(Opcode) { 125 default: return false; 126 case AMDGPU::DOT4_r600: 127 case AMDGPU::DOT4_eg: 128 return true; 129 } 130} 131 132bool R600InstrInfo::isCubeOp(unsigned Opcode) const 133{ 134 switch(Opcode) { 135 default: return false; 136 case AMDGPU::CUBE_r600_pseudo: 137 case AMDGPU::CUBE_r600_real: 138 case AMDGPU::CUBE_eg_pseudo: 139 case AMDGPU::CUBE_eg_real: 140 return true; 141 } 142} 143 144DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 145 const ScheduleDAG *DAG) const 146{ 147 const InstrItineraryData *II = TM->getInstrItineraryData(); 148 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 149} 150 151static bool 152isPredicateSetter(unsigned Opcode) 153{ 154 switch (Opcode) { 155 case AMDGPU::PRED_X: 156 return true; 157 default: 158 return false; 159 } 160} 161 162static MachineInstr * 163findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 164 MachineBasicBlock::iterator I) 165{ 166 while (I != MBB.begin()) { 167 --I; 168 MachineInstr *MI = I; 169 if (isPredicateSetter(MI->getOpcode())) 170 return MI; 171 } 172 173 return NULL; 174} 175 176bool 177R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 178 MachineBasicBlock *&TBB, 179 MachineBasicBlock *&FBB, 180 SmallVectorImpl<MachineOperand> &Cond, 181 bool AllowModify) const 182{ 183 // Most of the following comes from the ARM implementation of AnalyzeBranch 184 185 // If the block has no terminators, it just falls into the block after it. 186 MachineBasicBlock::iterator I = MBB.end(); 187 if (I == MBB.begin()) 188 return false; 189 --I; 190 while (I->isDebugValue()) { 191 if (I == MBB.begin()) 192 return false; 193 --I; 194 } 195 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { 196 return false; 197 } 198 199 // Get the last instruction in the block. 200 MachineInstr *LastInst = I; 201 202 // If there is only one terminator instruction, process it. 203 unsigned LastOpc = LastInst->getOpcode(); 204 if (I == MBB.begin() || 205 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { 206 if (LastOpc == AMDGPU::JUMP) { 207 if(!isPredicated(LastInst)) { 208 TBB = LastInst->getOperand(0).getMBB(); 209 return false; 210 } else { 211 MachineInstr *predSet = I; 212 while (!isPredicateSetter(predSet->getOpcode())) { 213 predSet = --I; 214 } 215 TBB = LastInst->getOperand(0).getMBB(); 216 Cond.push_back(predSet->getOperand(1)); 217 Cond.push_back(predSet->getOperand(2)); 218 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 219 return false; 220 } 221 } 222 return true; // Can't handle indirect branch. 223 } 224 225 // Get the instruction before it if it is a terminator. 226 MachineInstr *SecondLastInst = I; 227 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 228 229 // If the block ends with a B and a Bcc, handle it. 230 if (SecondLastOpc == AMDGPU::JUMP && 231 isPredicated(SecondLastInst) && 232 LastOpc == AMDGPU::JUMP && 233 !isPredicated(LastInst)) { 234 MachineInstr *predSet = --I; 235 while (!isPredicateSetter(predSet->getOpcode())) { 236 predSet = --I; 237 } 238 TBB = SecondLastInst->getOperand(0).getMBB(); 239 FBB = LastInst->getOperand(0).getMBB(); 240 Cond.push_back(predSet->getOperand(1)); 241 Cond.push_back(predSet->getOperand(2)); 242 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 243 return false; 244 } 245 246 // Otherwise, can't handle this. 247 return true; 248} 249 250int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 251 const MachineInstr *MI = op.getParent(); 252 253 switch (MI->getDesc().OpInfo->RegClass) { 254 default: // FIXME: fallthrough?? 255 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 256 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 257 }; 258} 259 260unsigned 261R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 262 MachineBasicBlock *TBB, 263 MachineBasicBlock *FBB, 264 const SmallVectorImpl<MachineOperand> &Cond, 265 DebugLoc DL) const 266{ 267 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 268 269 if (FBB == 0) { 270 if (Cond.empty()) { 271 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); 272 return 1; 273 } else { 274 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 275 assert(PredSet && "No previous predicate !"); 276 addFlag(PredSet, 1, MO_FLAG_PUSH); 277 PredSet->getOperand(2).setImm(Cond[1].getImm()); 278 279 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 280 .addMBB(TBB) 281 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 282 return 1; 283 } 284 } else { 285 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 286 assert(PredSet && "No previous predicate !"); 287 addFlag(PredSet, 1, MO_FLAG_PUSH); 288 PredSet->getOperand(2).setImm(Cond[1].getImm()); 289 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 290 .addMBB(TBB) 291 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 292 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); 293 return 2; 294 } 295} 296 297unsigned 298R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const 299{ 300 301 // Note : we leave PRED* instructions there. 302 // They may be needed when predicating instructions. 303 304 MachineBasicBlock::iterator I = MBB.end(); 305 306 if (I == MBB.begin()) { 307 return 0; 308 } 309 --I; 310 switch (I->getOpcode()) { 311 default: 312 return 0; 313 case AMDGPU::JUMP: 314 if (isPredicated(I)) { 315 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 316 clearFlag(predSet, 1, MO_FLAG_PUSH); 317 } 318 I->eraseFromParent(); 319 break; 320 } 321 I = MBB.end(); 322 323 if (I == MBB.begin()) { 324 return 1; 325 } 326 --I; 327 switch (I->getOpcode()) { 328 // FIXME: only one case?? 329 default: 330 return 1; 331 case AMDGPU::JUMP: 332 if (isPredicated(I)) { 333 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 334 clearFlag(predSet, 1, MO_FLAG_PUSH); 335 } 336 I->eraseFromParent(); 337 break; 338 } 339 return 2; 340} 341 342bool 343R600InstrInfo::isPredicated(const MachineInstr *MI) const 344{ 345 int idx = MI->findFirstPredOperandIdx(); 346 if (idx < 0) 347 return false; 348 349 unsigned Reg = MI->getOperand(idx).getReg(); 350 switch (Reg) { 351 default: return false; 352 case AMDGPU::PRED_SEL_ONE: 353 case AMDGPU::PRED_SEL_ZERO: 354 case AMDGPU::PREDICATE_BIT: 355 return true; 356 } 357} 358 359bool 360R600InstrInfo::isPredicable(MachineInstr *MI) const 361{ 362 return AMDGPUInstrInfo::isPredicable(MI); 363} 364 365 366bool 367R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 368 unsigned NumCyles, 369 unsigned ExtraPredCycles, 370 const BranchProbability &Probability) const{ 371 return true; 372} 373 374bool 375R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 376 unsigned NumTCycles, 377 unsigned ExtraTCycles, 378 MachineBasicBlock &FMBB, 379 unsigned NumFCycles, 380 unsigned ExtraFCycles, 381 const BranchProbability &Probability) const 382{ 383 return true; 384} 385 386bool 387R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 388 unsigned NumCyles, 389 const BranchProbability &Probability) 390 const 391{ 392 return true; 393} 394 395bool 396R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 397 MachineBasicBlock &FMBB) const 398{ 399 return false; 400} 401 402 403bool 404R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 405{ 406 MachineOperand &MO = Cond[1]; 407 switch (MO.getImm()) { 408 case OPCODE_IS_ZERO_INT: 409 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 410 break; 411 case OPCODE_IS_NOT_ZERO_INT: 412 MO.setImm(OPCODE_IS_ZERO_INT); 413 break; 414 case OPCODE_IS_ZERO: 415 MO.setImm(OPCODE_IS_NOT_ZERO); 416 break; 417 case OPCODE_IS_NOT_ZERO: 418 MO.setImm(OPCODE_IS_ZERO); 419 break; 420 default: 421 return true; 422 } 423 424 MachineOperand &MO2 = Cond[2]; 425 switch (MO2.getReg()) { 426 case AMDGPU::PRED_SEL_ZERO: 427 MO2.setReg(AMDGPU::PRED_SEL_ONE); 428 break; 429 case AMDGPU::PRED_SEL_ONE: 430 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 431 break; 432 default: 433 return true; 434 } 435 return false; 436} 437 438bool 439R600InstrInfo::DefinesPredicate(MachineInstr *MI, 440 std::vector<MachineOperand> &Pred) const 441{ 442 return isPredicateSetter(MI->getOpcode()); 443} 444 445 446bool 447R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 448 const SmallVectorImpl<MachineOperand> &Pred2) const 449{ 450 return false; 451} 452 453 454bool 455R600InstrInfo::PredicateInstruction(MachineInstr *MI, 456 const SmallVectorImpl<MachineOperand> &Pred) const 457{ 458 int PIdx = MI->findFirstPredOperandIdx(); 459 460 if (PIdx != -1) { 461 MachineOperand &PMO = MI->getOperand(PIdx); 462 PMO.setReg(Pred[2].getReg()); 463 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 464 return true; 465 } 466 467 return false; 468} 469 470int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 471 const MachineInstr *MI, 472 unsigned *PredCost) const 473{ 474 if (PredCost) 475 *PredCost = 2; 476 return 2; 477} 478 479//===----------------------------------------------------------------------===// 480// Instruction flag getters/setters 481//===----------------------------------------------------------------------===// 482 483bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const 484{ 485 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 486} 487 488MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const 489{ 490 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags); 491 assert(FlagIndex != 0 && 492 "Instruction flags not supported for this instruction"); 493 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 494 assert(FlagOp.isImm()); 495 return FlagOp; 496} 497 498void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 499 unsigned Flag) const 500{ 501 MachineOperand &FlagOp = getFlagOp(MI); 502 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 503} 504 505void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 506 unsigned Flag) const 507{ 508 MachineOperand &FlagOp = getFlagOp(MI); 509 unsigned InstFlags = FlagOp.getImm(); 510 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 511 FlagOp.setImm(InstFlags); 512} 513