R600InstrInfo.cpp revision 05882985757e655f5298af483c881008d45e6249
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600InstrInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPUSubtarget.h"
17#include "R600RegisterInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "AMDILUtilityFunctions.h"
20
21#define GET_INSTRINFO_CTOR
22#include "AMDGPUGenDFAPacketizer.inc"
23
24using namespace llvm;
25
26R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
27  : AMDGPUInstrInfo(tm),
28    RI(tm, *this),
29    TM(tm)
30  { }
31
32const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
33{
34  return RI;
35}
36
37bool R600InstrInfo::isTrig(const MachineInstr &MI) const
38{
39  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
40}
41
42bool R600InstrInfo::isVector(const MachineInstr &MI) const
43{
44  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
45}
46
47void
48R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
49                           MachineBasicBlock::iterator MI, DebugLoc DL,
50                           unsigned DestReg, unsigned SrcReg,
51                           bool KillSrc) const
52{
53  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
54      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
55    for (unsigned i = 0; i < 4; i++) {
56      unsigned SubRegIndex = RI.getSubRegFromChannel(i);
57      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
58              .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
59              .addReg(RI.getSubReg(SrcReg, SubRegIndex))
60              .addReg(0) // PREDICATE_BIT
61              .addReg(DestReg, RegState::Define | RegState::Implicit);
62    }
63  } else {
64
65    /* We can't copy vec4 registers */
66    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
67           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
68
69    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
70      .addReg(SrcReg, getKillRegState(KillSrc))
71      .addReg(0); // PREDICATE_BIT
72  }
73}
74
75MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
76                                             unsigned DstReg, int64_t Imm) const
77{
78  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
79  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
80  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
81  MachineInstrBuilder(MI).addImm(Imm);
82  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
83
84  return MI;
85}
86
87unsigned R600InstrInfo::getIEQOpcode() const
88{
89  return AMDGPU::SETE_INT;
90}
91
92bool R600InstrInfo::isMov(unsigned Opcode) const
93{
94
95
96  switch(Opcode) {
97  default: return false;
98  case AMDGPU::MOV:
99  case AMDGPU::MOV_IMM_F32:
100  case AMDGPU::MOV_IMM_I32:
101    return true;
102  }
103}
104
105// Some instructions act as place holders to emulate operations that the GPU
106// hardware does automatically. This function can be used to check if
107// an opcode falls into this category.
108bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const
109{
110  switch (opcode) {
111  default: return false;
112  case AMDGPU::RETURN:
113  case AMDGPU::LAST:
114  case AMDGPU::MASK_WRITE:
115  case AMDGPU::RESERVE_REG:
116    return true;
117  }
118}
119
120bool R600InstrInfo::isTexOp(unsigned opcode) const
121{
122  switch(opcode) {
123  default: return false;
124  case AMDGPU::TEX_LD:
125  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
126  case AMDGPU::TEX_SAMPLE:
127  case AMDGPU::TEX_SAMPLE_C:
128  case AMDGPU::TEX_SAMPLE_L:
129  case AMDGPU::TEX_SAMPLE_C_L:
130  case AMDGPU::TEX_SAMPLE_LB:
131  case AMDGPU::TEX_SAMPLE_C_LB:
132  case AMDGPU::TEX_SAMPLE_G:
133  case AMDGPU::TEX_SAMPLE_C_G:
134  case AMDGPU::TEX_GET_GRADIENTS_H:
135  case AMDGPU::TEX_GET_GRADIENTS_V:
136  case AMDGPU::TEX_SET_GRADIENTS_H:
137  case AMDGPU::TEX_SET_GRADIENTS_V:
138    return true;
139  }
140}
141
142bool R600InstrInfo::isReductionOp(unsigned opcode) const
143{
144  switch(opcode) {
145    default: return false;
146    case AMDGPU::DOT4_r600:
147    case AMDGPU::DOT4_eg:
148      return true;
149  }
150}
151
152bool R600InstrInfo::isCubeOp(unsigned opcode) const
153{
154  switch(opcode) {
155    default: return false;
156    case AMDGPU::CUBE_r600:
157    case AMDGPU::CUBE_eg:
158      return true;
159  }
160}
161
162
163bool R600InstrInfo::isFCOp(unsigned opcode) const
164{
165  switch(opcode) {
166  default: return false;
167  case AMDGPU::BREAK_LOGICALZ_f32:
168  case AMDGPU::BREAK_LOGICALNZ_i32:
169  case AMDGPU::BREAK_LOGICALZ_i32:
170  case AMDGPU::BREAK_LOGICALNZ_f32:
171  case AMDGPU::CONTINUE_LOGICALNZ_f32:
172  case AMDGPU::IF_LOGICALNZ_i32:
173  case AMDGPU::IF_LOGICALZ_f32:
174  case AMDGPU::ELSE:
175  case AMDGPU::ENDIF:
176  case AMDGPU::ENDLOOP:
177  case AMDGPU::IF_LOGICALNZ_f32:
178  case AMDGPU::WHILELOOP:
179    return true;
180  }
181}
182
183DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
184    const ScheduleDAG *DAG) const
185{
186  const InstrItineraryData *II = TM->getInstrItineraryData();
187  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
188}
189
190static bool
191isPredicateSetter(unsigned opcode)
192{
193  switch (opcode) {
194  case AMDGPU::PRED_X:
195    return true;
196  default:
197    return false;
198  }
199}
200
201static MachineInstr *
202findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
203                             MachineBasicBlock::iterator I)
204{
205  while (I != MBB.begin()) {
206    --I;
207    MachineInstr *MI = I;
208    if (isPredicateSetter(MI->getOpcode()))
209      return MI;
210  }
211
212  return NULL;
213}
214
215bool
216R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
217                             MachineBasicBlock *&TBB,
218                             MachineBasicBlock *&FBB,
219                             SmallVectorImpl<MachineOperand> &Cond,
220                             bool AllowModify) const
221{
222  // Most of the following comes from the ARM implementation of AnalyzeBranch
223
224  // If the block has no terminators, it just falls into the block after it.
225  MachineBasicBlock::iterator I = MBB.end();
226  if (I == MBB.begin())
227    return false;
228  --I;
229  while (I->isDebugValue()) {
230    if (I == MBB.begin())
231      return false;
232    --I;
233  }
234  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
235    return false;
236  }
237
238  // Get the last instruction in the block.
239  MachineInstr *LastInst = I;
240
241  // If there is only one terminator instruction, process it.
242  unsigned LastOpc = LastInst->getOpcode();
243  if (I == MBB.begin() ||
244      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
245    if (LastOpc == AMDGPU::JUMP) {
246      if(!isPredicated(LastInst)) {
247        TBB = LastInst->getOperand(0).getMBB();
248        return false;
249      } else {
250        MachineInstr *predSet = I;
251        while (!isPredicateSetter(predSet->getOpcode())) {
252          predSet = --I;
253        }
254        TBB = LastInst->getOperand(0).getMBB();
255        Cond.push_back(predSet->getOperand(1));
256        Cond.push_back(predSet->getOperand(2));
257        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
258        return false;
259      }
260    }
261    return true;  // Can't handle indirect branch.
262  }
263
264  // Get the instruction before it if it is a terminator.
265  MachineInstr *SecondLastInst = I;
266  unsigned SecondLastOpc = SecondLastInst->getOpcode();
267
268  // If the block ends with a B and a Bcc, handle it.
269  if (SecondLastOpc == AMDGPU::JUMP &&
270      isPredicated(SecondLastInst) &&
271      LastOpc == AMDGPU::JUMP &&
272      !isPredicated(LastInst)) {
273    MachineInstr *predSet = --I;
274    while (!isPredicateSetter(predSet->getOpcode())) {
275      predSet = --I;
276    }
277    TBB = SecondLastInst->getOperand(0).getMBB();
278    FBB = LastInst->getOperand(0).getMBB();
279    Cond.push_back(predSet->getOperand(1));
280    Cond.push_back(predSet->getOperand(2));
281    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
282    return false;
283  }
284
285  // Otherwise, can't handle this.
286  return true;
287}
288
289int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
290  const MachineInstr *MI = op.getParent();
291
292  switch (MI->getDesc().OpInfo->RegClass) {
293  default: // FIXME: fallthrough??
294  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
295  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
296  };
297}
298
299unsigned
300R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
301                            MachineBasicBlock *TBB,
302                            MachineBasicBlock *FBB,
303                            const SmallVectorImpl<MachineOperand> &Cond,
304                            DebugLoc DL) const
305{
306  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
307
308  if (FBB == 0) {
309    if (Cond.empty()) {
310      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
311      return 1;
312    } else {
313      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
314      assert(PredSet && "No previous predicate !");
315      PredSet->getOperand(1).addTargetFlag(1<<4);
316      PredSet->getOperand(2).setImm(Cond[1].getImm());
317
318      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
319             .addMBB(TBB)
320             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
321      return 1;
322    }
323  } else {
324    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
325    assert(PredSet && "No previous predicate !");
326    PredSet->getOperand(1).addTargetFlag(1<<4);
327    PredSet->getOperand(2).setImm(Cond[1].getImm());
328    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
329            .addMBB(TBB)
330            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
331    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
332    return 2;
333  }
334}
335
336unsigned
337R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
338{
339
340  // Note : we leave PRED* instructions there.
341  // They may be needed when predicating instructions.
342
343  MachineBasicBlock::iterator I = MBB.end();
344
345  if (I == MBB.begin()) {
346    return 0;
347  }
348  --I;
349  switch (I->getOpcode()) {
350  default:
351    return 0;
352  case AMDGPU::JUMP:
353    if (isPredicated(I)) {
354      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
355      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
356      predSet->getOperand(1).setTargetFlags(flag);
357    }
358    I->eraseFromParent();
359    break;
360  }
361  I = MBB.end();
362
363  if (I == MBB.begin()) {
364    return 1;
365  }
366  --I;
367  switch (I->getOpcode()) {
368    // FIXME: only one case??
369  default:
370    return 1;
371  case AMDGPU::JUMP:
372    if (isPredicated(I)) {
373      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
374      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
375      predSet->getOperand(1).setTargetFlags(flag);
376    }
377    I->eraseFromParent();
378    break;
379  }
380  return 2;
381}
382
383bool
384R600InstrInfo::isPredicated(const MachineInstr *MI) const
385{
386  int idx = MI->findFirstPredOperandIdx();
387  if (idx < 0)
388    return false;
389
390  unsigned Reg = MI->getOperand(idx).getReg();
391  switch (Reg) {
392  default: return false;
393  case AMDGPU::PRED_SEL_ONE:
394  case AMDGPU::PRED_SEL_ZERO:
395  case AMDGPU::PREDICATE_BIT:
396    return true;
397  }
398}
399
400bool
401R600InstrInfo::isPredicable(MachineInstr *MI) const
402{
403  return AMDGPUInstrInfo::isPredicable(MI);
404}
405
406
407bool
408R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
409                                   unsigned NumCyles,
410                                   unsigned ExtraPredCycles,
411                                   const BranchProbability &Probability) const{
412  return true;
413}
414
415bool
416R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
417                                   unsigned NumTCycles,
418                                   unsigned ExtraTCycles,
419                                   MachineBasicBlock &FMBB,
420                                   unsigned NumFCycles,
421                                   unsigned ExtraFCycles,
422                                   const BranchProbability &Probability) const
423{
424  return true;
425}
426
427bool
428R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
429                                         unsigned NumCyles,
430                                         const BranchProbability &Probability)
431                                         const
432{
433  return true;
434}
435
436bool
437R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
438                                         MachineBasicBlock &FMBB) const
439{
440  return false;
441}
442
443
444bool
445R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
446{
447  MachineOperand &MO = Cond[1];
448  switch (MO.getImm()) {
449  case OPCODE_IS_ZERO_INT:
450    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
451    break;
452  case OPCODE_IS_NOT_ZERO_INT:
453    MO.setImm(OPCODE_IS_ZERO_INT);
454    break;
455  case OPCODE_IS_ZERO:
456    MO.setImm(OPCODE_IS_NOT_ZERO);
457    break;
458  case OPCODE_IS_NOT_ZERO:
459    MO.setImm(OPCODE_IS_ZERO);
460    break;
461  default:
462    return true;
463  }
464
465  MachineOperand &MO2 = Cond[2];
466  switch (MO2.getReg()) {
467  case AMDGPU::PRED_SEL_ZERO:
468    MO2.setReg(AMDGPU::PRED_SEL_ONE);
469    break;
470  case AMDGPU::PRED_SEL_ONE:
471    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
472    break;
473  default:
474    return true;
475  }
476  return false;
477}
478
479bool
480R600InstrInfo::DefinesPredicate(MachineInstr *MI,
481                                std::vector<MachineOperand> &Pred) const
482{
483  return isPredicateSetter(MI->getOpcode());
484}
485
486
487bool
488R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
489                       const SmallVectorImpl<MachineOperand> &Pred2) const
490{
491  return false;
492}
493
494
495bool
496R600InstrInfo::PredicateInstruction(MachineInstr *MI,
497                      const SmallVectorImpl<MachineOperand> &Pred) const
498{
499  int PIdx = MI->findFirstPredOperandIdx();
500
501  if (PIdx != -1) {
502    MachineOperand &PMO = MI->getOperand(PIdx);
503    PMO.setReg(Pred[2].getReg());
504    MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
505    return true;
506  }
507
508  return false;
509}
510
511int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
512                                   const MachineInstr *MI,
513                                   unsigned *PredCost) const
514{
515  if (PredCost)
516    *PredCost = 2;
517  return 2;
518}
519