R600InstrInfo.cpp revision 0eca5fd919b0a31ea926b5f5072e5e56f7a55269
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// R600 Implementation of TargetInstrInfo.
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600InstrInfo.h"
15#include "AMDGPUTargetMachine.h"
16#include "AMDGPUSubtarget.h"
17#include "R600RegisterInfo.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "AMDILUtilityFunctions.h"
20#include "AMDGPUUtil.h"
21
22#define GET_INSTRINFO_CTOR
23#include "AMDGPUGenDFAPacketizer.inc"
24
25using namespace llvm;
26
27R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
28  : AMDGPUInstrInfo(tm),
29    RI(tm, *this),
30    TM(tm)
31  { }
32
33const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
34{
35  return RI;
36}
37
38bool R600InstrInfo::isTrig(const MachineInstr &MI) const
39{
40  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
41}
42
43bool R600InstrInfo::isVector(const MachineInstr &MI) const
44{
45  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
46}
47
48void
49R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50                           MachineBasicBlock::iterator MI, DebugLoc DL,
51                           unsigned DestReg, unsigned SrcReg,
52                           bool KillSrc) const
53{
54
55  unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y,
56                           AMDGPU::sel_z, AMDGPU::sel_w};
57
58  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
59      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
60    for (unsigned i = 0; i < 4; i++) {
61      BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
62              .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define)
63              .addReg(RI.getSubReg(SrcReg, subRegMap[i]))
64              .addReg(0) // PREDICATE_BIT
65              .addReg(DestReg, RegState::Define | RegState::Implicit);
66    }
67  } else {
68
69    /* We can't copy vec4 registers */
70    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
71           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
72
73    BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
74      .addReg(SrcReg, getKillRegState(KillSrc))
75      .addReg(0); // PREDICATE_BIT
76  }
77}
78
79MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
80                                             unsigned DstReg, int64_t Imm) const
81{
82  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
83  MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
84  MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
85  MachineInstrBuilder(MI).addImm(Imm);
86  MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
87
88  return MI;
89}
90
91unsigned R600InstrInfo::getIEQOpcode() const
92{
93  return AMDGPU::SETE_INT;
94}
95
96bool R600InstrInfo::isMov(unsigned Opcode) const
97{
98
99
100  switch(Opcode) {
101  default: return false;
102  case AMDGPU::MOV:
103  case AMDGPU::MOV_IMM_F32:
104  case AMDGPU::MOV_IMM_I32:
105    return true;
106  }
107}
108
109// Some instructions act as place holders to emulate operations that the GPU
110// hardware does automatically. This function can be used to check if
111// an opcode falls into this category.
112bool R600InstrInfo::isPlaceHolderOpcode(unsigned opcode) const
113{
114  switch (opcode) {
115  default: return false;
116  case AMDGPU::RETURN:
117  case AMDGPU::LAST:
118  case AMDGPU::MASK_WRITE:
119  case AMDGPU::RESERVE_REG:
120    return true;
121  }
122}
123
124bool R600InstrInfo::isTexOp(unsigned opcode) const
125{
126  switch(opcode) {
127  default: return false;
128  case AMDGPU::TEX_LD:
129  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
130  case AMDGPU::TEX_SAMPLE:
131  case AMDGPU::TEX_SAMPLE_C:
132  case AMDGPU::TEX_SAMPLE_L:
133  case AMDGPU::TEX_SAMPLE_C_L:
134  case AMDGPU::TEX_SAMPLE_LB:
135  case AMDGPU::TEX_SAMPLE_C_LB:
136  case AMDGPU::TEX_SAMPLE_G:
137  case AMDGPU::TEX_SAMPLE_C_G:
138  case AMDGPU::TEX_GET_GRADIENTS_H:
139  case AMDGPU::TEX_GET_GRADIENTS_V:
140  case AMDGPU::TEX_SET_GRADIENTS_H:
141  case AMDGPU::TEX_SET_GRADIENTS_V:
142    return true;
143  }
144}
145
146bool R600InstrInfo::isReductionOp(unsigned opcode) const
147{
148  switch(opcode) {
149    default: return false;
150    case AMDGPU::DOT4_r600:
151    case AMDGPU::DOT4_eg:
152      return true;
153  }
154}
155
156bool R600InstrInfo::isCubeOp(unsigned opcode) const
157{
158  switch(opcode) {
159    default: return false;
160    case AMDGPU::CUBE_r600:
161    case AMDGPU::CUBE_eg:
162      return true;
163  }
164}
165
166
167bool R600InstrInfo::isFCOp(unsigned opcode) const
168{
169  switch(opcode) {
170  default: return false;
171  case AMDGPU::BREAK_LOGICALZ_f32:
172  case AMDGPU::BREAK_LOGICALNZ_i32:
173  case AMDGPU::BREAK_LOGICALZ_i32:
174  case AMDGPU::BREAK_LOGICALNZ_f32:
175  case AMDGPU::CONTINUE_LOGICALNZ_f32:
176  case AMDGPU::IF_LOGICALNZ_i32:
177  case AMDGPU::IF_LOGICALZ_f32:
178  case AMDGPU::ELSE:
179  case AMDGPU::ENDIF:
180  case AMDGPU::ENDLOOP:
181  case AMDGPU::IF_LOGICALNZ_f32:
182  case AMDGPU::WHILELOOP:
183    return true;
184  }
185}
186
187DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
188    const ScheduleDAG *DAG) const
189{
190  const InstrItineraryData *II = TM->getInstrItineraryData();
191  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
192}
193
194static bool
195isPredicateSetter(unsigned opcode)
196{
197  switch (opcode) {
198  case AMDGPU::PRED_X:
199    return true;
200  default:
201    return false;
202  }
203}
204
205static MachineInstr *
206findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
207                             MachineBasicBlock::iterator I)
208{
209  while (I != MBB.begin()) {
210    --I;
211    MachineInstr *MI = I;
212    if (isPredicateSetter(MI->getOpcode()))
213      return MI;
214  }
215
216  return NULL;
217}
218
219bool
220R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
221                             MachineBasicBlock *&TBB,
222                             MachineBasicBlock *&FBB,
223                             SmallVectorImpl<MachineOperand> &Cond,
224                             bool AllowModify) const
225{
226  // Most of the following comes from the ARM implementation of AnalyzeBranch
227
228  // If the block has no terminators, it just falls into the block after it.
229  MachineBasicBlock::iterator I = MBB.end();
230  if (I == MBB.begin())
231    return false;
232  --I;
233  while (I->isDebugValue()) {
234    if (I == MBB.begin())
235      return false;
236    --I;
237  }
238  if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
239    return false;
240  }
241
242  // Get the last instruction in the block.
243  MachineInstr *LastInst = I;
244
245  // If there is only one terminator instruction, process it.
246  unsigned LastOpc = LastInst->getOpcode();
247  if (I == MBB.begin() ||
248      static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
249    if (LastOpc == AMDGPU::JUMP) {
250      if(!isPredicated(LastInst)) {
251        TBB = LastInst->getOperand(0).getMBB();
252        return false;
253      } else {
254        MachineInstr *predSet = I;
255        while (!isPredicateSetter(predSet->getOpcode())) {
256          predSet = --I;
257        }
258        TBB = LastInst->getOperand(0).getMBB();
259        Cond.push_back(predSet->getOperand(1));
260        Cond.push_back(predSet->getOperand(2));
261        Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
262        return false;
263      }
264    }
265    return true;  // Can't handle indirect branch.
266  }
267
268  // Get the instruction before it if it is a terminator.
269  MachineInstr *SecondLastInst = I;
270  unsigned SecondLastOpc = SecondLastInst->getOpcode();
271
272  // If the block ends with a B and a Bcc, handle it.
273  if (SecondLastOpc == AMDGPU::JUMP &&
274      isPredicated(SecondLastInst) &&
275      LastOpc == AMDGPU::JUMP &&
276      !isPredicated(LastInst)) {
277    MachineInstr *predSet = --I;
278    while (!isPredicateSetter(predSet->getOpcode())) {
279      predSet = --I;
280    }
281    TBB = SecondLastInst->getOperand(0).getMBB();
282    FBB = LastInst->getOperand(0).getMBB();
283    Cond.push_back(predSet->getOperand(1));
284    Cond.push_back(predSet->getOperand(2));
285    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
286    return false;
287  }
288
289  // Otherwise, can't handle this.
290  return true;
291}
292
293int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
294  const MachineInstr *MI = op.getParent();
295
296  switch (MI->getDesc().OpInfo->RegClass) {
297  default: // FIXME: fallthrough??
298  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
299  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
300  };
301}
302
303unsigned
304R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
305                            MachineBasicBlock *TBB,
306                            MachineBasicBlock *FBB,
307                            const SmallVectorImpl<MachineOperand> &Cond,
308                            DebugLoc DL) const
309{
310  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
311
312  if (FBB == 0) {
313    if (Cond.empty()) {
314      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
315      return 1;
316    } else {
317      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
318      assert(PredSet && "No previous predicate !");
319      PredSet->getOperand(1).addTargetFlag(1<<4);
320      PredSet->getOperand(2).setImm(Cond[1].getImm());
321
322      BuildMI(&MBB, DL, get(AMDGPU::JUMP))
323             .addMBB(TBB)
324             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
325      return 1;
326    }
327  } else {
328    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
329    assert(PredSet && "No previous predicate !");
330    PredSet->getOperand(1).addTargetFlag(1<<4);
331    PredSet->getOperand(2).setImm(Cond[1].getImm());
332    BuildMI(&MBB, DL, get(AMDGPU::JUMP))
333            .addMBB(TBB)
334            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
335    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
336    return 2;
337  }
338}
339
340unsigned
341R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
342{
343
344  // Note : we leave PRED* instructions there.
345  // They may be needed when predicating instructions.
346
347  MachineBasicBlock::iterator I = MBB.end();
348
349  if (I == MBB.begin()) {
350    return 0;
351  }
352  --I;
353  switch (I->getOpcode()) {
354  default:
355    return 0;
356  case AMDGPU::JUMP:
357    if (isPredicated(I)) {
358      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
359      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
360      predSet->getOperand(1).setTargetFlags(flag);
361    }
362    I->eraseFromParent();
363    break;
364  }
365  I = MBB.end();
366
367  if (I == MBB.begin()) {
368    return 1;
369  }
370  --I;
371  switch (I->getOpcode()) {
372    // FIXME: only one case??
373  default:
374    return 1;
375  case AMDGPU::JUMP:
376    if (isPredicated(I)) {
377      MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
378      char flag = predSet->getOperand(1).getTargetFlags() & (~(1<<4));
379      predSet->getOperand(1).setTargetFlags(flag);
380    }
381    I->eraseFromParent();
382    break;
383  }
384  return 2;
385}
386
387bool
388R600InstrInfo::isPredicated(const MachineInstr *MI) const
389{
390  int idx = MI->findFirstPredOperandIdx();
391  if (idx < 0)
392    return false;
393
394  MI->dump();
395  unsigned Reg = MI->getOperand(idx).getReg();
396  switch (Reg) {
397  default: return false;
398  case AMDGPU::PRED_SEL_ONE:
399  case AMDGPU::PRED_SEL_ZERO:
400  case AMDGPU::PREDICATE_BIT:
401    return true;
402  }
403}
404
405bool
406R600InstrInfo::isPredicable(MachineInstr *MI) const
407{
408  return AMDGPUInstrInfo::isPredicable(MI);
409}
410