R600InstrInfo.cpp revision 90bd1d52bbf95947955a66ec67f5f6c7dc87119a
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Implementation of TargetInstrInfo. 11// 12//===----------------------------------------------------------------------===// 13 14#include "R600InstrInfo.h" 15#include "AMDGPUTargetMachine.h" 16#include "AMDGPUSubtarget.h" 17#include "R600Defines.h" 18#include "R600RegisterInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "AMDILUtilityFunctions.h" 21 22#define GET_INSTRINFO_CTOR 23#include "AMDGPUGenDFAPacketizer.inc" 24 25using namespace llvm; 26 27R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 28 : AMDGPUInstrInfo(tm), 29 RI(tm, *this), 30 TM(tm) 31 { } 32 33const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const 34{ 35 return RI; 36} 37 38bool R600InstrInfo::isTrig(const MachineInstr &MI) const 39{ 40 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 41} 42 43bool R600InstrInfo::isVector(const MachineInstr &MI) const 44{ 45 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 46} 47 48void 49R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 50 MachineBasicBlock::iterator MI, DebugLoc DL, 51 unsigned DestReg, unsigned SrcReg, 52 bool KillSrc) const 53{ 54 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) 55 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 56 for (unsigned i = 0; i < 4; i++) { 57 unsigned SubRegIndex = RI.getSubRegFromChannel(i); 58 BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) 59 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define) 60 .addReg(RI.getSubReg(SrcReg, SubRegIndex)) 61 .addImm(0) // Flag 62 .addReg(0) // PREDICATE_BIT 63 .addReg(DestReg, RegState::Define | RegState::Implicit); 64 } 65 } else { 66 67 /* We can't copy vec4 registers */ 68 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) 69 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); 70 71 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) 72 .addReg(SrcReg, getKillRegState(KillSrc)) 73 .addImm(0) // Flag 74 .addReg(0); // PREDICATE_BIT 75 } 76} 77 78MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, 79 unsigned DstReg, int64_t Imm) const 80{ 81 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); 82 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); 83 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); 84 MachineInstrBuilder(MI).addImm(Imm); 85 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT 86 87 return MI; 88} 89 90unsigned R600InstrInfo::getIEQOpcode() const 91{ 92 return AMDGPU::SETE_INT; 93} 94 95bool R600InstrInfo::isMov(unsigned Opcode) const 96{ 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106} 107 108// Some instructions act as place holders to emulate operations that the GPU 109// hardware does automatically. This function can be used to check if 110// an opcode falls into this category. 111bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const 112{ 113 switch (opcode) { 114 default: return false; 115 case AMDGPU::RETURN: 116 case AMDGPU::LAST: 117 case AMDGPU::MASK_WRITE: 118 case AMDGPU::RESERVE_REG: 119 return true; 120 } 121} 122 123bool R600InstrInfo::isReductionOp(unsigned opcode) const 124{ 125 switch(opcode) { 126 default: return false; 127 case AMDGPU::DOT4_r600: 128 case AMDGPU::DOT4_eg: 129 return true; 130 } 131} 132 133bool R600InstrInfo::isCubeOp(unsigned opcode) const 134{ 135 switch(opcode) { 136 default: return false; 137 case AMDGPU::CUBE_r600_pseudo: 138 case AMDGPU::CUBE_r600_real: 139 case AMDGPU::CUBE_eg_pseudo: 140 case AMDGPU::CUBE_eg_real: 141 return true; 142 } 143} 144 145DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 146 const ScheduleDAG *DAG) const 147{ 148 const InstrItineraryData *II = TM->getInstrItineraryData(); 149 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 150} 151 152static bool 153isPredicateSetter(unsigned opcode) 154{ 155 switch (opcode) { 156 case AMDGPU::PRED_X: 157 return true; 158 default: 159 return false; 160 } 161} 162 163static MachineInstr * 164findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 165 MachineBasicBlock::iterator I) 166{ 167 while (I != MBB.begin()) { 168 --I; 169 MachineInstr *MI = I; 170 if (isPredicateSetter(MI->getOpcode())) 171 return MI; 172 } 173 174 return NULL; 175} 176 177bool 178R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 179 MachineBasicBlock *&TBB, 180 MachineBasicBlock *&FBB, 181 SmallVectorImpl<MachineOperand> &Cond, 182 bool AllowModify) const 183{ 184 // Most of the following comes from the ARM implementation of AnalyzeBranch 185 186 // If the block has no terminators, it just falls into the block after it. 187 MachineBasicBlock::iterator I = MBB.end(); 188 if (I == MBB.begin()) 189 return false; 190 --I; 191 while (I->isDebugValue()) { 192 if (I == MBB.begin()) 193 return false; 194 --I; 195 } 196 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { 197 return false; 198 } 199 200 // Get the last instruction in the block. 201 MachineInstr *LastInst = I; 202 203 // If there is only one terminator instruction, process it. 204 unsigned LastOpc = LastInst->getOpcode(); 205 if (I == MBB.begin() || 206 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { 207 if (LastOpc == AMDGPU::JUMP) { 208 if(!isPredicated(LastInst)) { 209 TBB = LastInst->getOperand(0).getMBB(); 210 return false; 211 } else { 212 MachineInstr *predSet = I; 213 while (!isPredicateSetter(predSet->getOpcode())) { 214 predSet = --I; 215 } 216 TBB = LastInst->getOperand(0).getMBB(); 217 Cond.push_back(predSet->getOperand(1)); 218 Cond.push_back(predSet->getOperand(2)); 219 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 220 return false; 221 } 222 } 223 return true; // Can't handle indirect branch. 224 } 225 226 // Get the instruction before it if it is a terminator. 227 MachineInstr *SecondLastInst = I; 228 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 229 230 // If the block ends with a B and a Bcc, handle it. 231 if (SecondLastOpc == AMDGPU::JUMP && 232 isPredicated(SecondLastInst) && 233 LastOpc == AMDGPU::JUMP && 234 !isPredicated(LastInst)) { 235 MachineInstr *predSet = --I; 236 while (!isPredicateSetter(predSet->getOpcode())) { 237 predSet = --I; 238 } 239 TBB = SecondLastInst->getOperand(0).getMBB(); 240 FBB = LastInst->getOperand(0).getMBB(); 241 Cond.push_back(predSet->getOperand(1)); 242 Cond.push_back(predSet->getOperand(2)); 243 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 244 return false; 245 } 246 247 // Otherwise, can't handle this. 248 return true; 249} 250 251int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 252 const MachineInstr *MI = op.getParent(); 253 254 switch (MI->getDesc().OpInfo->RegClass) { 255 default: // FIXME: fallthrough?? 256 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 257 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 258 }; 259} 260 261unsigned 262R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 263 MachineBasicBlock *TBB, 264 MachineBasicBlock *FBB, 265 const SmallVectorImpl<MachineOperand> &Cond, 266 DebugLoc DL) const 267{ 268 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 269 270 if (FBB == 0) { 271 if (Cond.empty()) { 272 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); 273 return 1; 274 } else { 275 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 276 assert(PredSet && "No previous predicate !"); 277 AddFlag(PredSet, 1, MO_FLAG_PUSH); 278 PredSet->getOperand(2).setImm(Cond[1].getImm()); 279 280 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 281 .addMBB(TBB) 282 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 283 return 1; 284 } 285 } else { 286 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 287 assert(PredSet && "No previous predicate !"); 288 AddFlag(PredSet, 1, MO_FLAG_PUSH); 289 PredSet->getOperand(2).setImm(Cond[1].getImm()); 290 BuildMI(&MBB, DL, get(AMDGPU::JUMP)) 291 .addMBB(TBB) 292 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 293 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); 294 return 2; 295 } 296} 297 298unsigned 299R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const 300{ 301 302 // Note : we leave PRED* instructions there. 303 // They may be needed when predicating instructions. 304 305 MachineBasicBlock::iterator I = MBB.end(); 306 307 if (I == MBB.begin()) { 308 return 0; 309 } 310 --I; 311 switch (I->getOpcode()) { 312 default: 313 return 0; 314 case AMDGPU::JUMP: 315 if (isPredicated(I)) { 316 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 317 ClearFlag(predSet, 1, MO_FLAG_PUSH); 318 } 319 I->eraseFromParent(); 320 break; 321 } 322 I = MBB.end(); 323 324 if (I == MBB.begin()) { 325 return 1; 326 } 327 --I; 328 switch (I->getOpcode()) { 329 // FIXME: only one case?? 330 default: 331 return 1; 332 case AMDGPU::JUMP: 333 if (isPredicated(I)) { 334 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 335 ClearFlag(predSet, 1, MO_FLAG_PUSH); 336 } 337 I->eraseFromParent(); 338 break; 339 } 340 return 2; 341} 342 343bool 344R600InstrInfo::isPredicated(const MachineInstr *MI) const 345{ 346 int idx = MI->findFirstPredOperandIdx(); 347 if (idx < 0) 348 return false; 349 350 unsigned Reg = MI->getOperand(idx).getReg(); 351 switch (Reg) { 352 default: return false; 353 case AMDGPU::PRED_SEL_ONE: 354 case AMDGPU::PRED_SEL_ZERO: 355 case AMDGPU::PREDICATE_BIT: 356 return true; 357 } 358} 359 360bool 361R600InstrInfo::isPredicable(MachineInstr *MI) const 362{ 363 return AMDGPUInstrInfo::isPredicable(MI); 364} 365 366 367bool 368R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 369 unsigned NumCyles, 370 unsigned ExtraPredCycles, 371 const BranchProbability &Probability) const{ 372 return true; 373} 374 375bool 376R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 377 unsigned NumTCycles, 378 unsigned ExtraTCycles, 379 MachineBasicBlock &FMBB, 380 unsigned NumFCycles, 381 unsigned ExtraFCycles, 382 const BranchProbability &Probability) const 383{ 384 return true; 385} 386 387bool 388R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 389 unsigned NumCyles, 390 const BranchProbability &Probability) 391 const 392{ 393 return true; 394} 395 396bool 397R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 398 MachineBasicBlock &FMBB) const 399{ 400 return false; 401} 402 403 404bool 405R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const 406{ 407 MachineOperand &MO = Cond[1]; 408 switch (MO.getImm()) { 409 case OPCODE_IS_ZERO_INT: 410 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 411 break; 412 case OPCODE_IS_NOT_ZERO_INT: 413 MO.setImm(OPCODE_IS_ZERO_INT); 414 break; 415 case OPCODE_IS_ZERO: 416 MO.setImm(OPCODE_IS_NOT_ZERO); 417 break; 418 case OPCODE_IS_NOT_ZERO: 419 MO.setImm(OPCODE_IS_ZERO); 420 break; 421 default: 422 return true; 423 } 424 425 MachineOperand &MO2 = Cond[2]; 426 switch (MO2.getReg()) { 427 case AMDGPU::PRED_SEL_ZERO: 428 MO2.setReg(AMDGPU::PRED_SEL_ONE); 429 break; 430 case AMDGPU::PRED_SEL_ONE: 431 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 432 break; 433 default: 434 return true; 435 } 436 return false; 437} 438 439bool 440R600InstrInfo::DefinesPredicate(MachineInstr *MI, 441 std::vector<MachineOperand> &Pred) const 442{ 443 return isPredicateSetter(MI->getOpcode()); 444} 445 446 447bool 448R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 449 const SmallVectorImpl<MachineOperand> &Pred2) const 450{ 451 return false; 452} 453 454 455bool 456R600InstrInfo::PredicateInstruction(MachineInstr *MI, 457 const SmallVectorImpl<MachineOperand> &Pred) const 458{ 459 int PIdx = MI->findFirstPredOperandIdx(); 460 461 if (PIdx != -1) { 462 MachineOperand &PMO = MI->getOperand(PIdx); 463 PMO.setReg(Pred[2].getReg()); 464 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 465 return true; 466 } 467 468 return false; 469} 470 471int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 472 const MachineInstr *MI, 473 unsigned *PredCost) const 474{ 475 if (PredCost) 476 *PredCost = 2; 477 return 2; 478} 479 480//===----------------------------------------------------------------------===// 481// Instruction flag getters/setters 482//===----------------------------------------------------------------------===// 483 484bool R600InstrInfo::HasFlagOperand(const MachineInstr &MI) const 485{ 486 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 487} 488 489MachineOperand &R600InstrInfo::GetFlagOp(MachineInstr *MI) const 490{ 491 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags); 492 assert(FlagIndex != 0 && 493 "Instruction flags not supported for this instruction"); 494 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 495 assert(FlagOp.isImm()); 496 return FlagOp; 497} 498 499void R600InstrInfo::AddFlag(MachineInstr *MI, unsigned Operand, 500 unsigned Flag) const 501{ 502 MachineOperand &FlagOp = GetFlagOp(MI); 503 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 504} 505 506void R600InstrInfo::ClearFlag(MachineInstr *MI, unsigned Operand, 507 unsigned Flag) const 508{ 509 MachineOperand &FlagOp = GetFlagOp(MI); 510 unsigned InstFlags = FlagOp.getImm(); 511 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 512 FlagOp.setImm(InstFlags); 513} 514