R600InstrInfo.cpp revision 90bd1d52bbf95947955a66ec67f5f6c7dc87119a
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600InstrInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPUSubtarget.h"
17#include "R600Defines.h"
18#include "R600RegisterInfo.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "AMDILUtilityFunctions.h"
21
22#define GET_INSTRINFO_CTOR
23#include "AMDGPUGenDFAPacketizer.inc"
24
25using namespace llvm;
26
27R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
28  : AMDGPUInstrInfo(tm),
29    RI(tm, *this),
30    TM(tm)
31  { }
32
33const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
34{
35  return RI;
36}
37
38bool R600InstrInfo::isTrig(const MachineInstr &MI) const
39{
40  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
41}
42
43bool R600InstrInfo::isVector(const MachineInstr &MI) const
44{
45  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
46}
47
48void
49R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50                           MachineBasicBlock::iterator MI, DebugLoc DL,
51                           unsigned DestReg, unsigned SrcReg,
52                           bool KillSrc) const
53{
54  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
55      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
56    for (unsigned i = 0; i < 4; i++) {
57      unsigned SubRegIndex = RI.getSubRegFromChannel(i);
58      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
59              .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
60              .addReg(RI.getSubReg(SrcReg, SubRegIndex))
61              .addImm(0) // Flag
62              .addReg(0) // PREDICATE_BIT
63              .addReg(DestReg, RegState::Define | RegState::Implicit);
64    }
65  } else {
66
67    /* We can't copy vec4 registers */
68    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
69           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
70
71    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
72      .addReg(SrcReg, getKillRegState(KillSrc))
73      .addImm(0) // Flag
74      .addReg(0); // PREDICATE_BIT
75  }
76}
77
78MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
79                                             unsigned DstReg, int64_t Imm) const
80{
81  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
82  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
83  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
84  MachineInstrBuilder(MI).addImm(Imm);
85  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
86
87  return MI;
88}
89
90unsigned R600InstrInfo::getIEQOpcode() const
91{
92  return AMDGPU::SETE_INT;
93}
94
95bool R600InstrInfo::isMov(unsigned Opcode) const
96{
97
98
99  switch(Opcode) {
100  default: return false;
101  case AMDGPU::MOV:
102  case AMDGPU::MOV_IMM_F32:
103  case AMDGPU::MOV_IMM_I32:
104    return true;
105  }
106}
107
108// Some instructions act as place holders to emulate operations that the GPU
109// hardware does automatically. This function can be used to check if
110// an opcode falls into this category.
111bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const
112{
113  switch (opcode) {
114  default: return false;
115  case AMDGPU::RETURN:
116  case AMDGPU::LAST:
117  case AMDGPU::MASK_WRITE:
118  case AMDGPU::RESERVE_REG:
119    return true;
120  }
121}
122
123bool R600InstrInfo::isReductionOp(unsigned opcode) const
124{
125  switch(opcode) {
126    default: return false;
127    case AMDGPU::DOT4_r600:
128    case AMDGPU::DOT4_eg:
129      return true;
130  }
131}
132
133bool R600InstrInfo::isCubeOp(unsigned opcode) const
134{
135  switch(opcode) {
136    default: return false;
137    case AMDGPU::CUBE_r600_pseudo:
138    case AMDGPU::CUBE_r600_real:
139    case AMDGPU::CUBE_eg_pseudo:
140    case AMDGPU::CUBE_eg_real:
141      return true;
142  }
143}
144
145DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
146    const ScheduleDAG *DAG) const
147{
148  const InstrItineraryData *II = TM->getInstrItineraryData();
149  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
150}
151
152static bool
153isPredicateSetter(unsigned opcode)
154{
155  switch (opcode) {
156  case AMDGPU::PRED_X:
157    return true;
158  default:
159    return false;
160  }
161}
162
163static MachineInstr *
164findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
165                             MachineBasicBlock::iterator I)
166{
167  while (I != MBB.begin()) {
168    --I;
169    MachineInstr *MI = I;
170    if (isPredicateSetter(MI->getOpcode()))
171      return MI;
172  }
173
174  return NULL;
175}
176
177bool
178R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
179                             MachineBasicBlock *&TBB,
180                             MachineBasicBlock *&FBB,
181                             SmallVectorImpl<MachineOperand> &Cond,
182                             bool AllowModify) const
183{
184  // Most of the following comes from the ARM implementation of AnalyzeBranch
185
186  // If the block has no terminators, it just falls into the block after it.
187  MachineBasicBlock::iterator I = MBB.end();
188  if (I == MBB.begin())
189    return false;
190  --I;
191  while (I->isDebugValue()) {
192    if (I == MBB.begin())
193      return false;
194    --I;
195  }
196  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
197    return false;
198  }
199
200  // Get the last instruction in the block.
201  MachineInstr *LastInst = I;
202
203  // If there is only one terminator instruction, process it.
204  unsigned LastOpc = LastInst->getOpcode();
205  if (I == MBB.begin() ||
206      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
207    if (LastOpc == AMDGPU::JUMP) {
208      if(!isPredicated(LastInst)) {
209        TBB = LastInst->getOperand(0).getMBB();
210        return false;
211      } else {
212        MachineInstr *predSet = I;
213        while (!isPredicateSetter(predSet->getOpcode())) {
214          predSet = --I;
215        }
216        TBB = LastInst->getOperand(0).getMBB();
217        Cond.push_back(predSet->getOperand(1));
218        Cond.push_back(predSet->getOperand(2));
219        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
220        return false;
221      }
222    }
223    return true;  // Can't handle indirect branch.
224  }
225
226  // Get the instruction before it if it is a terminator.
227  MachineInstr *SecondLastInst = I;
228  unsigned SecondLastOpc = SecondLastInst->getOpcode();
229
230  // If the block ends with a B and a Bcc, handle it.
231  if (SecondLastOpc == AMDGPU::JUMP &&
232      isPredicated(SecondLastInst) &&
233      LastOpc == AMDGPU::JUMP &&
234      !isPredicated(LastInst)) {
235    MachineInstr *predSet = --I;
236    while (!isPredicateSetter(predSet->getOpcode())) {
237      predSet = --I;
238    }
239    TBB = SecondLastInst->getOperand(0).getMBB();
240    FBB = LastInst->getOperand(0).getMBB();
241    Cond.push_back(predSet->getOperand(1));
242    Cond.push_back(predSet->getOperand(2));
243    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
244    return false;
245  }
246
247  // Otherwise, can't handle this.
248  return true;
249}
250
251int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
252  const MachineInstr *MI = op.getParent();
253
254  switch (MI->getDesc().OpInfo->RegClass) {
255  default: // FIXME: fallthrough??
256  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
257  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
258  };
259}
260
261unsigned
262R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
263                            MachineBasicBlock *TBB,
264                            MachineBasicBlock *FBB,
265                            const SmallVectorImpl<MachineOperand> &Cond,
266                            DebugLoc DL) const
267{
268  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
269
270  if (FBB == 0) {
271    if (Cond.empty()) {
272      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
273      return 1;
274    } else {
275      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
276      assert(PredSet && "No previous predicate !");
277      AddFlag(PredSet, 1, MO_FLAG_PUSH);
278      PredSet->getOperand(2).setImm(Cond[1].getImm());
279
280      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
281             .addMBB(TBB)
282             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
283      return 1;
284    }
285  } else {
286    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
287    assert(PredSet && "No previous predicate !");
288    AddFlag(PredSet, 1, MO_FLAG_PUSH);
289    PredSet->getOperand(2).setImm(Cond[1].getImm());
290    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
291            .addMBB(TBB)
292            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
293    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
294    return 2;
295  }
296}
297
298unsigned
299R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
300{
301
302  // Note : we leave PRED* instructions there.
303  // They may be needed when predicating instructions.
304
305  MachineBasicBlock::iterator I = MBB.end();
306
307  if (I == MBB.begin()) {
308    return 0;
309  }
310  --I;
311  switch (I->getOpcode()) {
312  default:
313    return 0;
314  case AMDGPU::JUMP:
315    if (isPredicated(I)) {
316      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
317      ClearFlag(predSet, 1, MO_FLAG_PUSH);
318    }
319    I->eraseFromParent();
320    break;
321  }
322  I = MBB.end();
323
324  if (I == MBB.begin()) {
325    return 1;
326  }
327  --I;
328  switch (I->getOpcode()) {
329    // FIXME: only one case??
330  default:
331    return 1;
332  case AMDGPU::JUMP:
333    if (isPredicated(I)) {
334      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
335      ClearFlag(predSet, 1, MO_FLAG_PUSH);
336    }
337    I->eraseFromParent();
338    break;
339  }
340  return 2;
341}
342
343bool
344R600InstrInfo::isPredicated(const MachineInstr *MI) const
345{
346  int idx = MI->findFirstPredOperandIdx();
347  if (idx < 0)
348    return false;
349
350  unsigned Reg = MI->getOperand(idx).getReg();
351  switch (Reg) {
352  default: return false;
353  case AMDGPU::PRED_SEL_ONE:
354  case AMDGPU::PRED_SEL_ZERO:
355  case AMDGPU::PREDICATE_BIT:
356    return true;
357  }
358}
359
360bool
361R600InstrInfo::isPredicable(MachineInstr *MI) const
362{
363  return AMDGPUInstrInfo::isPredicable(MI);
364}
365
366
367bool
368R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
369                                   unsigned NumCyles,
370                                   unsigned ExtraPredCycles,
371                                   const BranchProbability &Probability) const{
372  return true;
373}
374
375bool
376R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
377                                   unsigned NumTCycles,
378                                   unsigned ExtraTCycles,
379                                   MachineBasicBlock &FMBB,
380                                   unsigned NumFCycles,
381                                   unsigned ExtraFCycles,
382                                   const BranchProbability &Probability) const
383{
384  return true;
385}
386
387bool
388R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
389                                         unsigned NumCyles,
390                                         const BranchProbability &Probability)
391                                         const
392{
393  return true;
394}
395
396bool
397R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
398                                         MachineBasicBlock &FMBB) const
399{
400  return false;
401}
402
403
404bool
405R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
406{
407  MachineOperand &MO = Cond[1];
408  switch (MO.getImm()) {
409  case OPCODE_IS_ZERO_INT:
410    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
411    break;
412  case OPCODE_IS_NOT_ZERO_INT:
413    MO.setImm(OPCODE_IS_ZERO_INT);
414    break;
415  case OPCODE_IS_ZERO:
416    MO.setImm(OPCODE_IS_NOT_ZERO);
417    break;
418  case OPCODE_IS_NOT_ZERO:
419    MO.setImm(OPCODE_IS_ZERO);
420    break;
421  default:
422    return true;
423  }
424
425  MachineOperand &MO2 = Cond[2];
426  switch (MO2.getReg()) {
427  case AMDGPU::PRED_SEL_ZERO:
428    MO2.setReg(AMDGPU::PRED_SEL_ONE);
429    break;
430  case AMDGPU::PRED_SEL_ONE:
431    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
432    break;
433  default:
434    return true;
435  }
436  return false;
437}
438
439bool
440R600InstrInfo::DefinesPredicate(MachineInstr *MI,
441                                std::vector<MachineOperand> &Pred) const
442{
443  return isPredicateSetter(MI->getOpcode());
444}
445
446
447bool
448R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
449                       const SmallVectorImpl<MachineOperand> &Pred2) const
450{
451  return false;
452}
453
454
455bool
456R600InstrInfo::PredicateInstruction(MachineInstr *MI,
457                      const SmallVectorImpl<MachineOperand> &Pred) const
458{
459  int PIdx = MI->findFirstPredOperandIdx();
460
461  if (PIdx != -1) {
462    MachineOperand &PMO = MI->getOperand(PIdx);
463    PMO.setReg(Pred[2].getReg());
464    MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
465    return true;
466  }
467
468  return false;
469}
470
471int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
472                                   const MachineInstr *MI,
473                                   unsigned *PredCost) const
474{
475  if (PredCost)
476    *PredCost = 2;
477  return 2;
478}
479
480//===----------------------------------------------------------------------===//
481// Instruction flag getters/setters
482//===----------------------------------------------------------------------===//
483
484bool R600InstrInfo::HasFlagOperand(const MachineInstr &MI) const
485{
486  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
487}
488
489MachineOperand &R600InstrInfo::GetFlagOp(MachineInstr *MI) const
490{
491  unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
492  assert(FlagIndex != 0 &&
493         "Instruction flags not supported for this instruction");
494  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
495  assert(FlagOp.isImm());
496  return FlagOp;
497}
498
499void R600InstrInfo::AddFlag(MachineInstr *MI, unsigned Operand,
500                            unsigned Flag) const
501{
502  MachineOperand &FlagOp = GetFlagOp(MI);
503  FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
504}
505
506void R600InstrInfo::ClearFlag(MachineInstr *MI, unsigned Operand,
507                              unsigned Flag) const
508{
509  MachineOperand &FlagOp = GetFlagOp(MI);
510  unsigned InstFlags = FlagOp.getImm();
511  InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
512  FlagOp.setImm(InstFlags);
513}
514