R600InstrInfo.cpp revision 1cb07bd3b8abd5e52e9dbd80bb1666058545387e
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600InstrInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPUSubtarget.h"
17#include "R600RegisterInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "AMDILUtilityFunctions.h"
20
21#define GET_INSTRINFO_CTOR
22#include "AMDGPUGenDFAPacketizer.inc"
23
24using namespace llvm;
25
26R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
27  : AMDGPUInstrInfo(tm),
28    RI(tm, *this),
29    TM(tm)
30  { }
31
32const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
33{
34  return RI;
35}
36
37bool R600InstrInfo::isTrig(const MachineInstr &MI) const
38{
39  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
40}
41
42bool R600InstrInfo::isVector(const MachineInstr &MI) const
43{
44  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
45}
46
47void
48R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
49                           MachineBasicBlock::iterator MI, DebugLoc DL,
50                           unsigned DestReg, unsigned SrcReg,
51                           bool KillSrc) const
52{
53  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
54      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
55    for (unsigned i = 0; i < 4; i++) {
56      unsigned SubRegIndex = RI.getSubRegFromChannel(i);
57      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
58              .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
59              .addReg(RI.getSubReg(SrcReg, SubRegIndex))
60              .addReg(0) // PREDICATE_BIT
61              .addReg(DestReg, RegState::Define | RegState::Implicit);
62    }
63  } else {
64
65    /* We can't copy vec4 registers */
66    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
67           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
68
69    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
70      .addReg(SrcReg, getKillRegState(KillSrc))
71      .addReg(0); // PREDICATE_BIT
72  }
73}
74
75MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
76                                             unsigned DstReg, int64_t Imm) const
77{
78  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
79  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
80  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
81  MachineInstrBuilder(MI).addImm(Imm);
82  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
83
84  return MI;
85}
86
87unsigned R600InstrInfo::getIEQOpcode() const
88{
89  return AMDGPU::SETE_INT;
90}
91
92bool R600InstrInfo::isMov(unsigned Opcode) const
93{
94
95
96  switch(Opcode) {
97  default: return false;
98  case AMDGPU::MOV:
99  case AMDGPU::MOV_IMM_F32:
100  case AMDGPU::MOV_IMM_I32:
101    return true;
102  }
103}
104
105// Some instructions act as place holders to emulate operations that the GPU
106// hardware does automatically. This function can be used to check if
107// an opcode falls into this category.
108bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const
109{
110  switch (opcode) {
111  default: return false;
112  case AMDGPU::RETURN:
113  case AMDGPU::LAST:
114  case AMDGPU::MASK_WRITE:
115  case AMDGPU::RESERVE_REG:
116    return true;
117  }
118}
119
120bool R600InstrInfo::isTexOp(unsigned opcode) const
121{
122  switch(opcode) {
123  default: return false;
124  case AMDGPU::TEX_LD:
125  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
126  case AMDGPU::TEX_SAMPLE:
127  case AMDGPU::TEX_SAMPLE_C:
128  case AMDGPU::TEX_SAMPLE_L:
129  case AMDGPU::TEX_SAMPLE_C_L:
130  case AMDGPU::TEX_SAMPLE_LB:
131  case AMDGPU::TEX_SAMPLE_C_LB:
132  case AMDGPU::TEX_SAMPLE_G:
133  case AMDGPU::TEX_SAMPLE_C_G:
134  case AMDGPU::TEX_GET_GRADIENTS_H:
135  case AMDGPU::TEX_GET_GRADIENTS_V:
136  case AMDGPU::TEX_SET_GRADIENTS_H:
137  case AMDGPU::TEX_SET_GRADIENTS_V:
138    return true;
139  }
140}
141
142bool R600InstrInfo::isReductionOp(unsigned opcode) const
143{
144  switch(opcode) {
145    default: return false;
146    case AMDGPU::DOT4_r600:
147    case AMDGPU::DOT4_eg:
148      return true;
149  }
150}
151
152bool R600InstrInfo::isCubeOp(unsigned opcode) const
153{
154  switch(opcode) {
155    default: return false;
156    case AMDGPU::CUBE_r600_pseudo:
157    case AMDGPU::CUBE_r600_real:
158    case AMDGPU::CUBE_eg_pseudo:
159    case AMDGPU::CUBE_eg_real:
160      return true;
161  }
162}
163
164
165bool R600InstrInfo::isFCOp(unsigned opcode) const
166{
167  switch(opcode) {
168  default: return false;
169  case AMDGPU::BREAK_LOGICALZ_f32:
170  case AMDGPU::BREAK_LOGICALNZ_i32:
171  case AMDGPU::BREAK_LOGICALZ_i32:
172  case AMDGPU::BREAK_LOGICALNZ_f32:
173  case AMDGPU::CONTINUE_LOGICALNZ_f32:
174  case AMDGPU::IF_LOGICALNZ_i32:
175  case AMDGPU::IF_LOGICALZ_f32:
176  case AMDGPU::ELSE:
177  case AMDGPU::ENDIF:
178  case AMDGPU::ENDLOOP:
179  case AMDGPU::IF_LOGICALNZ_f32:
180  case AMDGPU::WHILELOOP:
181    return true;
182  }
183}
184
185DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
186    const ScheduleDAG *DAG) const
187{
188  const InstrItineraryData *II = TM->getInstrItineraryData();
189  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
190}
191
192static bool
193isPredicateSetter(unsigned opcode)
194{
195  switch (opcode) {
196  case AMDGPU::PRED_X:
197    return true;
198  default:
199    return false;
200  }
201}
202
203static MachineInstr *
204findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
205                             MachineBasicBlock::iterator I)
206{
207  while (I != MBB.begin()) {
208    --I;
209    MachineInstr *MI = I;
210    if (isPredicateSetter(MI->getOpcode()))
211      return MI;
212  }
213
214  return NULL;
215}
216
217bool
218R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
219                             MachineBasicBlock *&TBB,
220                             MachineBasicBlock *&FBB,
221                             SmallVectorImpl<MachineOperand> &Cond,
222                             bool AllowModify) const
223{
224  // Most of the following comes from the ARM implementation of AnalyzeBranch
225
226  // If the block has no terminators, it just falls into the block after it.
227  MachineBasicBlock::iterator I = MBB.end();
228  if (I == MBB.begin())
229    return false;
230  --I;
231  while (I->isDebugValue()) {
232    if (I == MBB.begin())
233      return false;
234    --I;
235  }
236  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
237    return false;
238  }
239
240  // Get the last instruction in the block.
241  MachineInstr *LastInst = I;
242
243  // If there is only one terminator instruction, process it.
244  unsigned LastOpc = LastInst->getOpcode();
245  if (I == MBB.begin() ||
246      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
247    if (LastOpc == AMDGPU::JUMP) {
248      if(!isPredicated(LastInst)) {
249        TBB = LastInst->getOperand(0).getMBB();
250        return false;
251      } else {
252        MachineInstr *predSet = I;
253        while (!isPredicateSetter(predSet->getOpcode())) {
254          predSet = --I;
255        }
256        TBB = LastInst->getOperand(0).getMBB();
257        Cond.push_back(predSet->getOperand(1));
258        Cond.push_back(predSet->getOperand(2));
259        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
260        return false;
261      }
262    }
263    return true;  // Can't handle indirect branch.
264  }
265
266  // Get the instruction before it if it is a terminator.
267  MachineInstr *SecondLastInst = I;
268  unsigned SecondLastOpc = SecondLastInst->getOpcode();
269
270  // If the block ends with a B and a Bcc, handle it.
271  if (SecondLastOpc == AMDGPU::JUMP &&
272      isPredicated(SecondLastInst) &&
273      LastOpc == AMDGPU::JUMP &&
274      !isPredicated(LastInst)) {
275    MachineInstr *predSet = --I;
276    while (!isPredicateSetter(predSet->getOpcode())) {
277      predSet = --I;
278    }
279    TBB = SecondLastInst->getOperand(0).getMBB();
280    FBB = LastInst->getOperand(0).getMBB();
281    Cond.push_back(predSet->getOperand(1));
282    Cond.push_back(predSet->getOperand(2));
283    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
284    return false;
285  }
286
287  // Otherwise, can't handle this.
288  return true;
289}
290
291int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
292  const MachineInstr *MI = op.getParent();
293
294  switch (MI->getDesc().OpInfo->RegClass) {
295  default: // FIXME: fallthrough??
296  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
297  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
298  };
299}
300
301unsigned
302R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
303                            MachineBasicBlock *TBB,
304                            MachineBasicBlock *FBB,
305                            const SmallVectorImpl<MachineOperand> &Cond,
306                            DebugLoc DL) const
307{
308  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
309
310  if (FBB == 0) {
311    if (Cond.empty()) {
312      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
313      return 1;
314    } else {
315      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
316      assert(PredSet && "No previous predicate !");
317      PredSet->getOperand(1).addTargetFlag(1<<4);
318      PredSet->getOperand(2).setImm(Cond[1].getImm());
319
320      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
321             .addMBB(TBB)
322             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
323      return 1;
324    }
325  } else {
326    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
327    assert(PredSet && "No previous predicate !");
328    PredSet->getOperand(1).addTargetFlag(1<<4);
329    PredSet->getOperand(2).setImm(Cond[1].getImm());
330    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
331            .addMBB(TBB)
332            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
333    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
334    return 2;
335  }
336}
337
338unsigned
339R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
340{
341
342  // Note : we leave PRED* instructions there.
343  // They may be needed when predicating instructions.
344
345  MachineBasicBlock::iterator I = MBB.end();
346
347  if (I == MBB.begin()) {
348    return 0;
349  }
350  --I;
351  switch (I->getOpcode()) {
352  default:
353    return 0;
354  case AMDGPU::JUMP:
355    if (isPredicated(I)) {
356      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
357      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
358      predSet->getOperand(1).setTargetFlags(flag);
359    }
360    I->eraseFromParent();
361    break;
362  }
363  I = MBB.end();
364
365  if (I == MBB.begin()) {
366    return 1;
367  }
368  --I;
369  switch (I->getOpcode()) {
370    // FIXME: only one case??
371  default:
372    return 1;
373  case AMDGPU::JUMP:
374    if (isPredicated(I)) {
375      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
376      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
377      predSet->getOperand(1).setTargetFlags(flag);
378    }
379    I->eraseFromParent();
380    break;
381  }
382  return 2;
383}
384
385bool
386R600InstrInfo::isPredicated(const MachineInstr *MI) const
387{
388  int idx = MI->findFirstPredOperandIdx();
389  if (idx < 0)
390    return false;
391
392  unsigned Reg = MI->getOperand(idx).getReg();
393  switch (Reg) {
394  default: return false;
395  case AMDGPU::PRED_SEL_ONE:
396  case AMDGPU::PRED_SEL_ZERO:
397  case AMDGPU::PREDICATE_BIT:
398    return true;
399  }
400}
401
402bool
403R600InstrInfo::isPredicable(MachineInstr *MI) const
404{
405  return AMDGPUInstrInfo::isPredicable(MI);
406}
407
408
409bool
410R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
411                                   unsigned NumCyles,
412                                   unsigned ExtraPredCycles,
413                                   const BranchProbability &Probability) const{
414  return true;
415}
416
417bool
418R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
419                                   unsigned NumTCycles,
420                                   unsigned ExtraTCycles,
421                                   MachineBasicBlock &FMBB,
422                                   unsigned NumFCycles,
423                                   unsigned ExtraFCycles,
424                                   const BranchProbability &Probability) const
425{
426  return true;
427}
428
429bool
430R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
431                                         unsigned NumCyles,
432                                         const BranchProbability &Probability)
433                                         const
434{
435  return true;
436}
437
438bool
439R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
440                                         MachineBasicBlock &FMBB) const
441{
442  return false;
443}
444
445
446bool
447R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
448{
449  MachineOperand &MO = Cond[1];
450  switch (MO.getImm()) {
451  case OPCODE_IS_ZERO_INT:
452    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
453    break;
454  case OPCODE_IS_NOT_ZERO_INT:
455    MO.setImm(OPCODE_IS_ZERO_INT);
456    break;
457  case OPCODE_IS_ZERO:
458    MO.setImm(OPCODE_IS_NOT_ZERO);
459    break;
460  case OPCODE_IS_NOT_ZERO:
461    MO.setImm(OPCODE_IS_ZERO);
462    break;
463  default:
464    return true;
465  }
466
467  MachineOperand &MO2 = Cond[2];
468  switch (MO2.getReg()) {
469  case AMDGPU::PRED_SEL_ZERO:
470    MO2.setReg(AMDGPU::PRED_SEL_ONE);
471    break;
472  case AMDGPU::PRED_SEL_ONE:
473    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
474    break;
475  default:
476    return true;
477  }
478  return false;
479}
480
481bool
482R600InstrInfo::DefinesPredicate(MachineInstr *MI,
483                                std::vector<MachineOperand> &Pred) const
484{
485  return isPredicateSetter(MI->getOpcode());
486}
487
488
489bool
490R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
491                       const SmallVectorImpl<MachineOperand> &Pred2) const
492{
493  return false;
494}
495
496
497bool
498R600InstrInfo::PredicateInstruction(MachineInstr *MI,
499                      const SmallVectorImpl<MachineOperand> &Pred) const
500{
501  int PIdx = MI->findFirstPredOperandIdx();
502
503  if (PIdx != -1) {
504    MachineOperand &PMO = MI->getOperand(PIdx);
505    PMO.setReg(Pred[2].getReg());
506    MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
507    return true;
508  }
509
510  return false;
511}
512
513int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
514                                   const MachineInstr *MI,
515                                   unsigned *PredCost) const
516{
517  if (PredCost)
518    *PredCost = 2;
519  return 2;
520}
521