R600InstrInfo.cpp revision a614979286f8d329af318c1e9fb067e17cab4315
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600InstrInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPUSubtarget.h"
17#include "R600RegisterInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "AMDILUtilityFunctions.h"
20
21#define GET_INSTRINFO_CTOR
22#include "AMDGPUGenDFAPacketizer.inc"
23
24using namespace llvm;
25
26R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
27  : AMDGPUInstrInfo(tm),
28    RI(tm, *this),
29    TM(tm)
30  { }
31
32const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
33{
34  return RI;
35}
36
37bool R600InstrInfo::isTrig(const MachineInstr &MI) const
38{
39  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
40}
41
42bool R600InstrInfo::isVector(const MachineInstr &MI) const
43{
44  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
45}
46
47void
48R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
49                           MachineBasicBlock::iterator MI, DebugLoc DL,
50                           unsigned DestReg, unsigned SrcReg,
51                           bool KillSrc) const
52{
53
54  unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y,
55                           AMDGPU::sel_z, AMDGPU::sel_w};
56
57  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
58      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
59    for (unsigned i = 0; i < 4; i++) {
60      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
61              .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
62              .addReg(RI.getSubReg(SrcReg, subRegMap[i]))
63              .addReg(0) // PREDICATE_BIT
64              .addReg(DestReg, RegState::Define | RegState::Implicit);
65    }
66  } else {
67
68    /* We can't copy vec4 registers */
69    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
70           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
71
72    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
73      .addReg(SrcReg, getKillRegState(KillSrc))
74      .addReg(0); // PREDICATE_BIT
75  }
76}
77
78MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
79                                             unsigned DstReg, int64_t Imm) const
80{
81  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
82  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
83  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
84  MachineInstrBuilder(MI).addImm(Imm);
85  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
86
87  return MI;
88}
89
90unsigned R600InstrInfo::getIEQOpcode() const
91{
92  return AMDGPU::SETE_INT;
93}
94
95bool R600InstrInfo::isMov(unsigned Opcode) const
96{
97
98
99  switch(Opcode) {
100  default: return false;
101  case AMDGPU::MOV:
102  case AMDGPU::MOV_IMM_F32:
103  case AMDGPU::MOV_IMM_I32:
104    return true;
105  }
106}
107
108// Some instructions act as place holders to emulate operations that the GPU
109// hardware does automatically. This function can be used to check if
110// an opcode falls into this category.
111bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const
112{
113  switch (opcode) {
114  default: return false;
115  case AMDGPU::RETURN:
116  case AMDGPU::LAST:
117  case AMDGPU::MASK_WRITE:
118  case AMDGPU::RESERVE_REG:
119    return true;
120  }
121}
122
123bool R600InstrInfo::isTexOp(unsigned opcode) const
124{
125  switch(opcode) {
126  default: return false;
127  case AMDGPU::TEX_LD:
128  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
129  case AMDGPU::TEX_SAMPLE:
130  case AMDGPU::TEX_SAMPLE_C:
131  case AMDGPU::TEX_SAMPLE_L:
132  case AMDGPU::TEX_SAMPLE_C_L:
133  case AMDGPU::TEX_SAMPLE_LB:
134  case AMDGPU::TEX_SAMPLE_C_LB:
135  case AMDGPU::TEX_SAMPLE_G:
136  case AMDGPU::TEX_SAMPLE_C_G:
137  case AMDGPU::TEX_GET_GRADIENTS_H:
138  case AMDGPU::TEX_GET_GRADIENTS_V:
139  case AMDGPU::TEX_SET_GRADIENTS_H:
140  case AMDGPU::TEX_SET_GRADIENTS_V:
141    return true;
142  }
143}
144
145bool R600InstrInfo::isReductionOp(unsigned opcode) const
146{
147  switch(opcode) {
148    default: return false;
149    case AMDGPU::DOT4_r600:
150    case AMDGPU::DOT4_eg:
151      return true;
152  }
153}
154
155bool R600InstrInfo::isCubeOp(unsigned opcode) const
156{
157  switch(opcode) {
158    default: return false;
159    case AMDGPU::CUBE_r600:
160    case AMDGPU::CUBE_eg:
161      return true;
162  }
163}
164
165
166bool R600InstrInfo::isFCOp(unsigned opcode) const
167{
168  switch(opcode) {
169  default: return false;
170  case AMDGPU::BREAK_LOGICALZ_f32:
171  case AMDGPU::BREAK_LOGICALNZ_i32:
172  case AMDGPU::BREAK_LOGICALZ_i32:
173  case AMDGPU::BREAK_LOGICALNZ_f32:
174  case AMDGPU::CONTINUE_LOGICALNZ_f32:
175  case AMDGPU::IF_LOGICALNZ_i32:
176  case AMDGPU::IF_LOGICALZ_f32:
177  case AMDGPU::ELSE:
178  case AMDGPU::ENDIF:
179  case AMDGPU::ENDLOOP:
180  case AMDGPU::IF_LOGICALNZ_f32:
181  case AMDGPU::WHILELOOP:
182    return true;
183  }
184}
185
186DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
187    const ScheduleDAG *DAG) const
188{
189  const InstrItineraryData *II = TM->getInstrItineraryData();
190  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
191}
192
193static bool
194isPredicateSetter(unsigned opcode)
195{
196  switch (opcode) {
197  case AMDGPU::PRED_X:
198    return true;
199  default:
200    return false;
201  }
202}
203
204static MachineInstr *
205findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
206                             MachineBasicBlock::iterator I)
207{
208  while (I != MBB.begin()) {
209    --I;
210    MachineInstr *MI = I;
211    if (isPredicateSetter(MI->getOpcode()))
212      return MI;
213  }
214
215  return NULL;
216}
217
218bool
219R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
220                             MachineBasicBlock *&TBB,
221                             MachineBasicBlock *&FBB,
222                             SmallVectorImpl<MachineOperand> &Cond,
223                             bool AllowModify) const
224{
225  // Most of the following comes from the ARM implementation of AnalyzeBranch
226
227  // If the block has no terminators, it just falls into the block after it.
228  MachineBasicBlock::iterator I = MBB.end();
229  if (I == MBB.begin())
230    return false;
231  --I;
232  while (I->isDebugValue()) {
233    if (I == MBB.begin())
234      return false;
235    --I;
236  }
237  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
238    return false;
239  }
240
241  // Get the last instruction in the block.
242  MachineInstr *LastInst = I;
243
244  // If there is only one terminator instruction, process it.
245  unsigned LastOpc = LastInst->getOpcode();
246  if (I == MBB.begin() ||
247      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
248    if (LastOpc == AMDGPU::JUMP) {
249      if(!isPredicated(LastInst)) {
250        TBB = LastInst->getOperand(0).getMBB();
251        return false;
252      } else {
253        MachineInstr *predSet = I;
254        while (!isPredicateSetter(predSet->getOpcode())) {
255          predSet = --I;
256        }
257        TBB = LastInst->getOperand(0).getMBB();
258        Cond.push_back(predSet->getOperand(1));
259        Cond.push_back(predSet->getOperand(2));
260        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
261        return false;
262      }
263    }
264    return true;  // Can't handle indirect branch.
265  }
266
267  // Get the instruction before it if it is a terminator.
268  MachineInstr *SecondLastInst = I;
269  unsigned SecondLastOpc = SecondLastInst->getOpcode();
270
271  // If the block ends with a B and a Bcc, handle it.
272  if (SecondLastOpc == AMDGPU::JUMP &&
273      isPredicated(SecondLastInst) &&
274      LastOpc == AMDGPU::JUMP &&
275      !isPredicated(LastInst)) {
276    MachineInstr *predSet = --I;
277    while (!isPredicateSetter(predSet->getOpcode())) {
278      predSet = --I;
279    }
280    TBB = SecondLastInst->getOperand(0).getMBB();
281    FBB = LastInst->getOperand(0).getMBB();
282    Cond.push_back(predSet->getOperand(1));
283    Cond.push_back(predSet->getOperand(2));
284    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
285    return false;
286  }
287
288  // Otherwise, can't handle this.
289  return true;
290}
291
292int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
293  const MachineInstr *MI = op.getParent();
294
295  switch (MI->getDesc().OpInfo->RegClass) {
296  default: // FIXME: fallthrough??
297  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
298  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
299  };
300}
301
302unsigned
303R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
304                            MachineBasicBlock *TBB,
305                            MachineBasicBlock *FBB,
306                            const SmallVectorImpl<MachineOperand> &Cond,
307                            DebugLoc DL) const
308{
309  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
310
311  if (FBB == 0) {
312    if (Cond.empty()) {
313      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
314      return 1;
315    } else {
316      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
317      assert(PredSet && "No previous predicate !");
318      PredSet->getOperand(1).addTargetFlag(1<<4);
319      PredSet->getOperand(2).setImm(Cond[1].getImm());
320
321      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
322             .addMBB(TBB)
323             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
324      return 1;
325    }
326  } else {
327    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
328    assert(PredSet && "No previous predicate !");
329    PredSet->getOperand(1).addTargetFlag(1<<4);
330    PredSet->getOperand(2).setImm(Cond[1].getImm());
331    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
332            .addMBB(TBB)
333            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
334    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
335    return 2;
336  }
337}
338
339unsigned
340R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
341{
342
343  // Note : we leave PRED* instructions there.
344  // They may be needed when predicating instructions.
345
346  MachineBasicBlock::iterator I = MBB.end();
347
348  if (I == MBB.begin()) {
349    return 0;
350  }
351  --I;
352  switch (I->getOpcode()) {
353  default:
354    return 0;
355  case AMDGPU::JUMP:
356    if (isPredicated(I)) {
357      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
358      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
359      predSet->getOperand(1).setTargetFlags(flag);
360    }
361    I->eraseFromParent();
362    break;
363  }
364  I = MBB.end();
365
366  if (I == MBB.begin()) {
367    return 1;
368  }
369  --I;
370  switch (I->getOpcode()) {
371    // FIXME: only one case??
372  default:
373    return 1;
374  case AMDGPU::JUMP:
375    if (isPredicated(I)) {
376      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
377      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
378      predSet->getOperand(1).setTargetFlags(flag);
379    }
380    I->eraseFromParent();
381    break;
382  }
383  return 2;
384}
385
386bool
387R600InstrInfo::isPredicated(const MachineInstr *MI) const
388{
389  int idx = MI->findFirstPredOperandIdx();
390  if (idx < 0)
391    return false;
392
393  unsigned Reg = MI->getOperand(idx).getReg();
394  switch (Reg) {
395  default: return false;
396  case AMDGPU::PRED_SEL_ONE:
397  case AMDGPU::PRED_SEL_ZERO:
398  case AMDGPU::PREDICATE_BIT:
399    return true;
400  }
401}
402
403bool
404R600InstrInfo::isPredicable(MachineInstr *MI) const
405{
406  return AMDGPUInstrInfo::isPredicable(MI);
407}
408
409
410bool
411R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
412                                   unsigned NumCyles,
413                                   unsigned ExtraPredCycles,
414                                   const BranchProbability &Probability) const{
415  return true;
416}
417
418bool
419R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
420                                   unsigned NumTCycles,
421                                   unsigned ExtraTCycles,
422                                   MachineBasicBlock &FMBB,
423                                   unsigned NumFCycles,
424                                   unsigned ExtraFCycles,
425                                   const BranchProbability &Probability) const
426{
427  return true;
428}
429
430bool
431R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
432                                         unsigned NumCyles,
433                                         const BranchProbability &Probability)
434                                         const
435{
436  return true;
437}
438
439bool
440R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
441                                         MachineBasicBlock &FMBB) const
442{
443  return false;
444}
445
446
447bool
448R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
449{
450  MachineOperand &MO = Cond[1];
451  switch (MO.getImm()) {
452  case OPCODE_IS_ZERO_INT:
453    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
454    break;
455  case OPCODE_IS_NOT_ZERO_INT:
456    MO.setImm(OPCODE_IS_ZERO_INT);
457    break;
458  case OPCODE_IS_ZERO:
459    MO.setImm(OPCODE_IS_NOT_ZERO);
460    break;
461  case OPCODE_IS_NOT_ZERO:
462    MO.setImm(OPCODE_IS_ZERO);
463    break;
464  default:
465    return true;
466  }
467
468  MachineOperand &MO2 = Cond[2];
469  switch (MO2.getReg()) {
470  case AMDGPU::PRED_SEL_ZERO:
471    MO2.setReg(AMDGPU::PRED_SEL_ONE);
472    break;
473  case AMDGPU::PRED_SEL_ONE:
474    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
475    break;
476  default:
477    return true;
478  }
479  return false;
480}
481
482bool
483R600InstrInfo::DefinesPredicate(MachineInstr *MI,
484                                std::vector<MachineOperand> &Pred) const
485{
486  return isPredicateSetter(MI->getOpcode());
487}
488
489
490bool
491R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
492                       const SmallVectorImpl<MachineOperand> &Pred2) const
493{
494  return false;
495}
496
497
498bool
499R600InstrInfo::PredicateInstruction(MachineInstr *MI,
500                      const SmallVectorImpl<MachineOperand> &Pred) const
501{
502  int PIdx = MI->findFirstPredOperandIdx();
503
504  if (PIdx != -1) {
505    MachineOperand &PMO = MI->getOperand(PIdx);
506    PMO.setReg(Pred[2].getReg());
507    MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
508    return true;
509  }
510
511  return false;
512}
513
514int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
515                                   const MachineInstr *MI,
516                                   unsigned *PredCost) const
517{
518  if (PredCost)
519    *PredCost = 2;
520  return 2;
521}
522