R600InstrInfo.cpp revision 7e9381951eb4dadf9c59257786416ac51a6a6c09
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600InstrInfo.h"
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "AMDGPUTargetMachine.h"
19#include "R600Defines.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25
26#define GET_INSTRINFO_CTOR
27#include "AMDGPUGenDFAPacketizer.inc"
28
29using namespace llvm;
30
31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
32  : AMDGPUInstrInfo(tm),
33    RI(tm),
34    ST(tm.getSubtarget<AMDGPUSubtarget>())
35  { }
36
37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
38  return RI;
39}
40
41bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
42  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
43}
44
45bool R600InstrInfo::isVector(const MachineInstr &MI) const {
46  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
47}
48
49void
50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
51                           MachineBasicBlock::iterator MI, DebugLoc DL,
52                           unsigned DestReg, unsigned SrcReg,
53                           bool KillSrc) const {
54  if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
55      && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
56    for (unsigned I = 0; I < 4; I++) {
57      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
58      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
59                              RI.getSubReg(DestReg, SubRegIndex),
60                              RI.getSubReg(SrcReg, SubRegIndex))
61                              .addReg(DestReg,
62                                      RegState::Define | RegState::Implicit);
63    }
64  } else {
65
66    // We can't copy vec4 registers
67    assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
68           && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
69
70    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
71                                                  DestReg, SrcReg);
72    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
73                                    .setIsKill(KillSrc);
74  }
75}
76
77MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
78                                             unsigned DstReg, int64_t Imm) const {
79  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
80  MachineInstrBuilder MIB(*MF, MI);
81  MIB.addReg(DstReg, RegState::Define);
82  MIB.addReg(AMDGPU::ALU_LITERAL_X);
83  MIB.addImm(Imm);
84  MIB.addReg(0); // PREDICATE_BIT
85
86  return MI;
87}
88
89unsigned R600InstrInfo::getIEQOpcode() const {
90  return AMDGPU::SETE_INT;
91}
92
93bool R600InstrInfo::isMov(unsigned Opcode) const {
94
95
96  switch(Opcode) {
97  default: return false;
98  case AMDGPU::MOV:
99  case AMDGPU::MOV_IMM_F32:
100  case AMDGPU::MOV_IMM_I32:
101    return true;
102  }
103}
104
105// Some instructions act as place holders to emulate operations that the GPU
106// hardware does automatically. This function can be used to check if
107// an opcode falls into this category.
108bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
109  switch (Opcode) {
110  default: return false;
111  case AMDGPU::RETURN:
112    return true;
113  }
114}
115
116bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
117  switch(Opcode) {
118    default: return false;
119  }
120}
121
122bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
123  switch(Opcode) {
124    default: return false;
125    case AMDGPU::CUBE_r600_pseudo:
126    case AMDGPU::CUBE_r600_real:
127    case AMDGPU::CUBE_eg_pseudo:
128    case AMDGPU::CUBE_eg_real:
129      return true;
130  }
131}
132
133bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
134  unsigned TargetFlags = get(Opcode).TSFlags;
135
136  return (TargetFlags & R600_InstFlag::ALU_INST);
137}
138
139bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
140  return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
141}
142
143bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
144  return isTransOnly(MI->getOpcode());
145}
146
147bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
148  return ST.hasVertexCache() && IS_VTX(get(Opcode));
149}
150
151bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
152  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
153  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
154}
155
156bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
157  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
158}
159
160bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
161  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
162  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
163         usesTextureCache(MI->getOpcode());
164}
165
166SmallVector<std::pair<MachineOperand *, int64_t>, 3>
167R600InstrInfo::getSrcs(MachineInstr *MI) const {
168  SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
169
170  if (MI->getOpcode() == AMDGPU::DOT_4) {
171    static const unsigned OpTable[8][2] = {
172      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
173      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
174      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
175      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
176      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
177      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
178      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
179      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
180    };
181
182    for (unsigned j = 0; j < 8; j++) {
183      MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
184                                                        OpTable[j][0]));
185      unsigned Reg = MO.getReg();
186      if (Reg == AMDGPU::ALU_CONST) {
187        unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
188                                                    OpTable[j][1])).getImm();
189        Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
190        continue;
191      }
192
193    }
194    return Result;
195  }
196
197  static const unsigned OpTable[3][2] = {
198    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
199    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
200    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
201  };
202
203  for (unsigned j = 0; j < 3; j++) {
204    int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
205    if (SrcIdx < 0)
206      break;
207    MachineOperand &MO = MI->getOperand(SrcIdx);
208    unsigned Reg = MI->getOperand(SrcIdx).getReg();
209    if (Reg == AMDGPU::ALU_CONST) {
210      unsigned Sel = MI->getOperand(
211          getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
212      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
213      continue;
214    }
215    if (Reg == AMDGPU::ALU_LITERAL_X) {
216      unsigned Imm = MI->getOperand(
217          getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
218      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
219      continue;
220    }
221    Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
222  }
223  return Result;
224}
225
226std::vector<std::pair<int, unsigned> >
227R600InstrInfo::ExtractSrcs(MachineInstr *MI,
228                           const DenseMap<unsigned, unsigned> &PV)
229    const {
230  const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
231  const std::pair<int, unsigned> DummyPair(-1, 0);
232  std::vector<std::pair<int, unsigned> > Result;
233  unsigned i = 0;
234  for (unsigned n = Srcs.size(); i < n; ++i) {
235    unsigned Reg = Srcs[i].first->getReg();
236    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
237    unsigned Chan = RI.getHWRegChan(Reg);
238    if (Index > 127) {
239      Result.push_back(DummyPair);
240      continue;
241    }
242    if (PV.find(Reg) != PV.end()) {
243      Result.push_back(DummyPair);
244      continue;
245    }
246    Result.push_back(std::pair<int, unsigned>(Index, Chan));
247  }
248  for (; i < 3; ++i)
249    Result.push_back(DummyPair);
250  return Result;
251}
252
253static std::vector<std::pair<int, unsigned> >
254Swizzle(std::vector<std::pair<int, unsigned> > Src,
255        R600InstrInfo::BankSwizzle Swz) {
256  switch (Swz) {
257  case R600InstrInfo::ALU_VEC_012:
258    break;
259  case R600InstrInfo::ALU_VEC_021:
260    std::swap(Src[1], Src[2]);
261    break;
262  case R600InstrInfo::ALU_VEC_102:
263    std::swap(Src[0], Src[1]);
264    break;
265  case R600InstrInfo::ALU_VEC_120:
266    std::swap(Src[0], Src[1]);
267    std::swap(Src[0], Src[2]);
268    break;
269  case R600InstrInfo::ALU_VEC_201:
270    std::swap(Src[0], Src[2]);
271    std::swap(Src[0], Src[1]);
272    break;
273  case R600InstrInfo::ALU_VEC_210:
274    std::swap(Src[0], Src[2]);
275    break;
276  }
277  return Src;
278}
279
280static bool
281isLegal(const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
282    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
283    unsigned CheckedSize) {
284  int Vector[4][3];
285  memset(Vector, -1, sizeof(Vector));
286  for (unsigned i = 0; i < CheckedSize; i++) {
287    const std::vector<std::pair<int, unsigned> > &Srcs =
288        Swizzle(IGSrcs[i], Swz[i]);
289    for (unsigned j = 0; j < 3; j++) {
290      const std::pair<int, unsigned> &Src = Srcs[j];
291      if (Src.first < 0)
292        continue;
293      if (Vector[Src.second][j] < 0)
294        Vector[Src.second][j] = Src.first;
295      if (Vector[Src.second][j] != Src.first)
296        return false;
297    }
298  }
299  return true;
300}
301
302static bool recursiveFitsFPLimitation(
303const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
304std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
305unsigned Depth = 0) {
306  if (!isLegal(IGSrcs, SwzCandidate, Depth))
307    return false;
308  if (IGSrcs.size() == Depth)
309    return true;
310  unsigned i = SwzCandidate[Depth];
311  for (; i < 6; i++) {
312    SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
313    if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
314      return true;
315  }
316  SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
317  return false;
318}
319
320bool
321R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
322                                      const DenseMap<unsigned, unsigned> &PV,
323                                      std::vector<BankSwizzle> &ValidSwizzle)
324    const {
325  //Todo : support shared src0 - src1 operand
326
327  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
328  ValidSwizzle.clear();
329  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
330    IGSrcs.push_back(ExtractSrcs(IG[i], PV));
331    unsigned Op = getOperandIdx(IG[i]->getOpcode(),
332        AMDGPU::OpName::bank_swizzle);
333    ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
334        IG[i]->getOperand(Op).getImm());
335  }
336  bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
337  if (!Result)
338    return false;
339  return true;
340}
341
342
343bool
344R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
345    const {
346  assert (Consts.size() <= 12 && "Too many operands in instructions group");
347  unsigned Pair1 = 0, Pair2 = 0;
348  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
349    unsigned ReadConstHalf = Consts[i] & 2;
350    unsigned ReadConstIndex = Consts[i] & (~3);
351    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
352    if (!Pair1) {
353      Pair1 = ReadHalfConst;
354      continue;
355    }
356    if (Pair1 == ReadHalfConst)
357      continue;
358    if (!Pair2) {
359      Pair2 = ReadHalfConst;
360      continue;
361    }
362    if (Pair2 != ReadHalfConst)
363      return false;
364  }
365  return true;
366}
367
368bool
369R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
370  std::vector<unsigned> Consts;
371  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
372    MachineInstr *MI = MIs[i];
373    if (!isALUInstr(MI->getOpcode()))
374      continue;
375
376    const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Srcs =
377        getSrcs(MI);
378
379    for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
380      std::pair<MachineOperand *, unsigned> Src = Srcs[j];
381      if (Src.first->getReg() == AMDGPU::ALU_CONST)
382        Consts.push_back(Src.second);
383      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
384          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
385        unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
386        unsigned Chan = RI.getHWRegChan(Src.first->getReg());
387        Consts.push_back((Index << 2) | Chan);
388      }
389    }
390  }
391  return fitsConstReadLimitations(Consts);
392}
393
394DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
395    const ScheduleDAG *DAG) const {
396  const InstrItineraryData *II = TM->getInstrItineraryData();
397  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
398}
399
400static bool
401isPredicateSetter(unsigned Opcode) {
402  switch (Opcode) {
403  case AMDGPU::PRED_X:
404    return true;
405  default:
406    return false;
407  }
408}
409
410static MachineInstr *
411findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
412                             MachineBasicBlock::iterator I) {
413  while (I != MBB.begin()) {
414    --I;
415    MachineInstr *MI = I;
416    if (isPredicateSetter(MI->getOpcode()))
417      return MI;
418  }
419
420  return NULL;
421}
422
423static
424bool isJump(unsigned Opcode) {
425  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
426}
427
428bool
429R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
430                             MachineBasicBlock *&TBB,
431                             MachineBasicBlock *&FBB,
432                             SmallVectorImpl<MachineOperand> &Cond,
433                             bool AllowModify) const {
434  // Most of the following comes from the ARM implementation of AnalyzeBranch
435
436  // If the block has no terminators, it just falls into the block after it.
437  MachineBasicBlock::iterator I = MBB.end();
438  if (I == MBB.begin())
439    return false;
440  --I;
441  while (I->isDebugValue()) {
442    if (I == MBB.begin())
443      return false;
444    --I;
445  }
446  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
447    return false;
448  }
449
450  // Get the last instruction in the block.
451  MachineInstr *LastInst = I;
452
453  // If there is only one terminator instruction, process it.
454  unsigned LastOpc = LastInst->getOpcode();
455  if (I == MBB.begin() ||
456          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
457    if (LastOpc == AMDGPU::JUMP) {
458      TBB = LastInst->getOperand(0).getMBB();
459      return false;
460    } else if (LastOpc == AMDGPU::JUMP_COND) {
461      MachineInstr *predSet = I;
462      while (!isPredicateSetter(predSet->getOpcode())) {
463        predSet = --I;
464      }
465      TBB = LastInst->getOperand(0).getMBB();
466      Cond.push_back(predSet->getOperand(1));
467      Cond.push_back(predSet->getOperand(2));
468      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
469      return false;
470    }
471    return true;  // Can't handle indirect branch.
472  }
473
474  // Get the instruction before it if it is a terminator.
475  MachineInstr *SecondLastInst = I;
476  unsigned SecondLastOpc = SecondLastInst->getOpcode();
477
478  // If the block ends with a B and a Bcc, handle it.
479  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
480    MachineInstr *predSet = --I;
481    while (!isPredicateSetter(predSet->getOpcode())) {
482      predSet = --I;
483    }
484    TBB = SecondLastInst->getOperand(0).getMBB();
485    FBB = LastInst->getOperand(0).getMBB();
486    Cond.push_back(predSet->getOperand(1));
487    Cond.push_back(predSet->getOperand(2));
488    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
489    return false;
490  }
491
492  // Otherwise, can't handle this.
493  return true;
494}
495
496int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
497  const MachineInstr *MI = op.getParent();
498
499  switch (MI->getDesc().OpInfo->RegClass) {
500  default: // FIXME: fallthrough??
501  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
502  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
503  };
504}
505
506unsigned
507R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
508                            MachineBasicBlock *TBB,
509                            MachineBasicBlock *FBB,
510                            const SmallVectorImpl<MachineOperand> &Cond,
511                            DebugLoc DL) const {
512  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
513
514  if (FBB == 0) {
515    if (Cond.empty()) {
516      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
517      return 1;
518    } else {
519      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
520      assert(PredSet && "No previous predicate !");
521      addFlag(PredSet, 0, MO_FLAG_PUSH);
522      PredSet->getOperand(2).setImm(Cond[1].getImm());
523
524      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
525             .addMBB(TBB)
526             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
527      return 1;
528    }
529  } else {
530    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
531    assert(PredSet && "No previous predicate !");
532    addFlag(PredSet, 0, MO_FLAG_PUSH);
533    PredSet->getOperand(2).setImm(Cond[1].getImm());
534    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
535            .addMBB(TBB)
536            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
537    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
538    return 2;
539  }
540}
541
542unsigned
543R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
544
545  // Note : we leave PRED* instructions there.
546  // They may be needed when predicating instructions.
547
548  MachineBasicBlock::iterator I = MBB.end();
549
550  if (I == MBB.begin()) {
551    return 0;
552  }
553  --I;
554  switch (I->getOpcode()) {
555  default:
556    return 0;
557  case AMDGPU::JUMP_COND: {
558    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
559    clearFlag(predSet, 0, MO_FLAG_PUSH);
560    I->eraseFromParent();
561    break;
562  }
563  case AMDGPU::JUMP:
564    I->eraseFromParent();
565    break;
566  }
567  I = MBB.end();
568
569  if (I == MBB.begin()) {
570    return 1;
571  }
572  --I;
573  switch (I->getOpcode()) {
574    // FIXME: only one case??
575  default:
576    return 1;
577  case AMDGPU::JUMP_COND: {
578    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
579    clearFlag(predSet, 0, MO_FLAG_PUSH);
580    I->eraseFromParent();
581    break;
582  }
583  case AMDGPU::JUMP:
584    I->eraseFromParent();
585    break;
586  }
587  return 2;
588}
589
590bool
591R600InstrInfo::isPredicated(const MachineInstr *MI) const {
592  int idx = MI->findFirstPredOperandIdx();
593  if (idx < 0)
594    return false;
595
596  unsigned Reg = MI->getOperand(idx).getReg();
597  switch (Reg) {
598  default: return false;
599  case AMDGPU::PRED_SEL_ONE:
600  case AMDGPU::PRED_SEL_ZERO:
601  case AMDGPU::PREDICATE_BIT:
602    return true;
603  }
604}
605
606bool
607R600InstrInfo::isPredicable(MachineInstr *MI) const {
608  // XXX: KILL* instructions can be predicated, but they must be the last
609  // instruction in a clause, so this means any instructions after them cannot
610  // be predicated.  Until we have proper support for instruction clauses in the
611  // backend, we will mark KILL* instructions as unpredicable.
612
613  if (MI->getOpcode() == AMDGPU::KILLGT) {
614    return false;
615  } else if (isVector(*MI)) {
616    return false;
617  } else {
618    return AMDGPUInstrInfo::isPredicable(MI);
619  }
620}
621
622
623bool
624R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
625                                   unsigned NumCyles,
626                                   unsigned ExtraPredCycles,
627                                   const BranchProbability &Probability) const{
628  return true;
629}
630
631bool
632R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
633                                   unsigned NumTCycles,
634                                   unsigned ExtraTCycles,
635                                   MachineBasicBlock &FMBB,
636                                   unsigned NumFCycles,
637                                   unsigned ExtraFCycles,
638                                   const BranchProbability &Probability) const {
639  return true;
640}
641
642bool
643R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
644                                         unsigned NumCyles,
645                                         const BranchProbability &Probability)
646                                         const {
647  return true;
648}
649
650bool
651R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
652                                         MachineBasicBlock &FMBB) const {
653  return false;
654}
655
656
657bool
658R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
659  MachineOperand &MO = Cond[1];
660  switch (MO.getImm()) {
661  case OPCODE_IS_ZERO_INT:
662    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
663    break;
664  case OPCODE_IS_NOT_ZERO_INT:
665    MO.setImm(OPCODE_IS_ZERO_INT);
666    break;
667  case OPCODE_IS_ZERO:
668    MO.setImm(OPCODE_IS_NOT_ZERO);
669    break;
670  case OPCODE_IS_NOT_ZERO:
671    MO.setImm(OPCODE_IS_ZERO);
672    break;
673  default:
674    return true;
675  }
676
677  MachineOperand &MO2 = Cond[2];
678  switch (MO2.getReg()) {
679  case AMDGPU::PRED_SEL_ZERO:
680    MO2.setReg(AMDGPU::PRED_SEL_ONE);
681    break;
682  case AMDGPU::PRED_SEL_ONE:
683    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
684    break;
685  default:
686    return true;
687  }
688  return false;
689}
690
691bool
692R600InstrInfo::DefinesPredicate(MachineInstr *MI,
693                                std::vector<MachineOperand> &Pred) const {
694  return isPredicateSetter(MI->getOpcode());
695}
696
697
698bool
699R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
700                       const SmallVectorImpl<MachineOperand> &Pred2) const {
701  return false;
702}
703
704
705bool
706R600InstrInfo::PredicateInstruction(MachineInstr *MI,
707                      const SmallVectorImpl<MachineOperand> &Pred) const {
708  int PIdx = MI->findFirstPredOperandIdx();
709
710  if (PIdx != -1) {
711    MachineOperand &PMO = MI->getOperand(PIdx);
712    PMO.setReg(Pred[2].getReg());
713    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
714    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
715    return true;
716  }
717
718  return false;
719}
720
721unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
722                                            const MachineInstr *MI,
723                                            unsigned *PredCost) const {
724  if (PredCost)
725    *PredCost = 2;
726  return 2;
727}
728
729int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
730  const MachineRegisterInfo &MRI = MF.getRegInfo();
731  const MachineFrameInfo *MFI = MF.getFrameInfo();
732  int Offset = 0;
733
734  if (MFI->getNumObjects() == 0) {
735    return -1;
736  }
737
738  if (MRI.livein_empty()) {
739    return 0;
740  }
741
742  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
743                                            LE = MRI.livein_end();
744                                            LI != LE; ++LI) {
745    Offset = std::max(Offset,
746                      GET_REG_INDEX(RI.getEncodingValue(LI->first)));
747  }
748
749  return Offset + 1;
750}
751
752int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
753  int Offset = 0;
754  const MachineFrameInfo *MFI = MF.getFrameInfo();
755
756  // Variable sized objects are not supported
757  assert(!MFI->hasVarSizedObjects());
758
759  if (MFI->getNumObjects() == 0) {
760    return -1;
761  }
762
763  Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
764
765  return getIndirectIndexBegin(MF) + Offset;
766}
767
768std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
769                                             const MachineFunction &MF) const {
770  const AMDGPUFrameLowering *TFL =
771                 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
772  std::vector<unsigned> Regs;
773
774  unsigned StackWidth = TFL->getStackWidth(MF);
775  int End = getIndirectIndexEnd(MF);
776
777  if (End == -1) {
778    return Regs;
779  }
780
781  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
782    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
783    Regs.push_back(SuperReg);
784    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
785      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
786      Regs.push_back(Reg);
787    }
788  }
789  return Regs;
790}
791
792unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
793                                                 unsigned Channel) const {
794  // XXX: Remove when we support a stack width > 2
795  assert(Channel == 0);
796  return RegIndex;
797}
798
799const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
800                                                     unsigned SourceReg) const {
801  return &AMDGPU::R600_TReg32RegClass;
802}
803
804const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
805  return &AMDGPU::TRegMemRegClass;
806}
807
808MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
809                                       MachineBasicBlock::iterator I,
810                                       unsigned ValueReg, unsigned Address,
811                                       unsigned OffsetReg) const {
812  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
813  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
814                                               AMDGPU::AR_X, OffsetReg);
815  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
816
817  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
818                                      AddrReg, ValueReg)
819                                      .addReg(AMDGPU::AR_X,
820                                           RegState::Implicit | RegState::Kill);
821  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
822  return Mov;
823}
824
825MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
826                                       MachineBasicBlock::iterator I,
827                                       unsigned ValueReg, unsigned Address,
828                                       unsigned OffsetReg) const {
829  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
830  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
831                                                       AMDGPU::AR_X,
832                                                       OffsetReg);
833  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
834  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
835                                      ValueReg,
836                                      AddrReg)
837                                      .addReg(AMDGPU::AR_X,
838                                           RegState::Implicit | RegState::Kill);
839  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
840
841  return Mov;
842}
843
844const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
845  return &AMDGPU::IndirectRegRegClass;
846}
847
848unsigned R600InstrInfo::getMaxAlusPerClause() const {
849  return 115;
850}
851
852MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
853                                                  MachineBasicBlock::iterator I,
854                                                  unsigned Opcode,
855                                                  unsigned DstReg,
856                                                  unsigned Src0Reg,
857                                                  unsigned Src1Reg) const {
858  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
859    DstReg);           // $dst
860
861  if (Src1Reg) {
862    MIB.addImm(0)     // $update_exec_mask
863       .addImm(0);    // $update_predicate
864  }
865  MIB.addImm(1)        // $write
866     .addImm(0)        // $omod
867     .addImm(0)        // $dst_rel
868     .addImm(0)        // $dst_clamp
869     .addReg(Src0Reg)  // $src0
870     .addImm(0)        // $src0_neg
871     .addImm(0)        // $src0_rel
872     .addImm(0)        // $src0_abs
873     .addImm(-1);       // $src0_sel
874
875  if (Src1Reg) {
876    MIB.addReg(Src1Reg) // $src1
877       .addImm(0)       // $src1_neg
878       .addImm(0)       // $src1_rel
879       .addImm(0)       // $src1_abs
880       .addImm(-1);      // $src1_sel
881  }
882
883  //XXX: The r600g finalizer expects this to be 1, once we've moved the
884  //scheduling to the backend, we can change the default to 0.
885  MIB.addImm(1)        // $last
886      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
887      .addImm(0)         // $literal
888      .addImm(0);        // $bank_swizzle
889
890  return MIB;
891}
892
893#define OPERAND_CASE(Label) \
894  case Label: { \
895    static const unsigned Ops[] = \
896    { \
897      Label##_X, \
898      Label##_Y, \
899      Label##_Z, \
900      Label##_W \
901    }; \
902    return Ops[Slot]; \
903  }
904
905static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
906  switch (Op) {
907  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
908  OPERAND_CASE(AMDGPU::OpName::update_pred)
909  OPERAND_CASE(AMDGPU::OpName::write)
910  OPERAND_CASE(AMDGPU::OpName::omod)
911  OPERAND_CASE(AMDGPU::OpName::dst_rel)
912  OPERAND_CASE(AMDGPU::OpName::clamp)
913  OPERAND_CASE(AMDGPU::OpName::src0)
914  OPERAND_CASE(AMDGPU::OpName::src0_neg)
915  OPERAND_CASE(AMDGPU::OpName::src0_rel)
916  OPERAND_CASE(AMDGPU::OpName::src0_abs)
917  OPERAND_CASE(AMDGPU::OpName::src0_sel)
918  OPERAND_CASE(AMDGPU::OpName::src1)
919  OPERAND_CASE(AMDGPU::OpName::src1_neg)
920  OPERAND_CASE(AMDGPU::OpName::src1_rel)
921  OPERAND_CASE(AMDGPU::OpName::src1_abs)
922  OPERAND_CASE(AMDGPU::OpName::src1_sel)
923  OPERAND_CASE(AMDGPU::OpName::pred_sel)
924  default:
925    llvm_unreachable("Wrong Operand");
926  }
927}
928
929#undef OPERAND_CASE
930
931MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
932    MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
933    const {
934  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
935  unsigned Opcode;
936  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
937  if (ST.getGeneration() <= AMDGPUSubtarget::R700)
938    Opcode = AMDGPU::DOT4_r600;
939  else
940    Opcode = AMDGPU::DOT4_eg;
941  MachineBasicBlock::iterator I = MI;
942  MachineOperand &Src0 = MI->getOperand(
943      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
944  MachineOperand &Src1 = MI->getOperand(
945      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
946  MachineInstr *MIB = buildDefaultInstruction(
947      MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
948  static const unsigned  Operands[14] = {
949    AMDGPU::OpName::update_exec_mask,
950    AMDGPU::OpName::update_pred,
951    AMDGPU::OpName::write,
952    AMDGPU::OpName::omod,
953    AMDGPU::OpName::dst_rel,
954    AMDGPU::OpName::clamp,
955    AMDGPU::OpName::src0_neg,
956    AMDGPU::OpName::src0_rel,
957    AMDGPU::OpName::src0_abs,
958    AMDGPU::OpName::src0_sel,
959    AMDGPU::OpName::src1_neg,
960    AMDGPU::OpName::src1_rel,
961    AMDGPU::OpName::src1_abs,
962    AMDGPU::OpName::src1_sel,
963  };
964
965  for (unsigned i = 0; i < 14; i++) {
966    MachineOperand &MO = MI->getOperand(
967        getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
968    assert (MO.isImm());
969    setImmOperand(MIB, Operands[i], MO.getImm());
970  }
971  MIB->getOperand(20).setImm(0);
972  return MIB;
973}
974
975MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
976                                         MachineBasicBlock::iterator I,
977                                         unsigned DstReg,
978                                         uint64_t Imm) const {
979  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
980                                                  AMDGPU::ALU_LITERAL_X);
981  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
982  return MovImm;
983}
984
985int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
986  return getOperandIdx(MI.getOpcode(), Op);
987}
988
989int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
990  return AMDGPU::getNamedOperandIdx(Opcode, Op);
991}
992
993void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
994                                  int64_t Imm) const {
995  int Idx = getOperandIdx(*MI, Op);
996  assert(Idx != -1 && "Operand not supported for this instruction.");
997  assert(MI->getOperand(Idx).isImm());
998  MI->getOperand(Idx).setImm(Imm);
999}
1000
1001//===----------------------------------------------------------------------===//
1002// Instruction flag getters/setters
1003//===----------------------------------------------------------------------===//
1004
1005bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
1006  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
1007}
1008
1009MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
1010                                         unsigned Flag) const {
1011  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1012  int FlagIndex = 0;
1013  if (Flag != 0) {
1014    // If we pass something other than the default value of Flag to this
1015    // function, it means we are want to set a flag on an instruction
1016    // that uses native encoding.
1017    assert(HAS_NATIVE_OPERANDS(TargetFlags));
1018    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1019    switch (Flag) {
1020    case MO_FLAG_CLAMP:
1021      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
1022      break;
1023    case MO_FLAG_MASK:
1024      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
1025      break;
1026    case MO_FLAG_NOT_LAST:
1027    case MO_FLAG_LAST:
1028      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
1029      break;
1030    case MO_FLAG_NEG:
1031      switch (SrcIdx) {
1032      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
1033      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
1034      case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
1035      }
1036      break;
1037
1038    case MO_FLAG_ABS:
1039      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1040                       "instructions.");
1041      (void)IsOP3;
1042      switch (SrcIdx) {
1043      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
1044      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
1045      }
1046      break;
1047
1048    default:
1049      FlagIndex = -1;
1050      break;
1051    }
1052    assert(FlagIndex != -1 && "Flag not supported for this instruction");
1053  } else {
1054      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1055      assert(FlagIndex != 0 &&
1056         "Instruction flags not supported for this instruction");
1057  }
1058
1059  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
1060  assert(FlagOp.isImm());
1061  return FlagOp;
1062}
1063
1064void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
1065                            unsigned Flag) const {
1066  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1067  if (Flag == 0) {
1068    return;
1069  }
1070  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1071    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1072    if (Flag == MO_FLAG_NOT_LAST) {
1073      clearFlag(MI, Operand, MO_FLAG_LAST);
1074    } else if (Flag == MO_FLAG_MASK) {
1075      clearFlag(MI, Operand, Flag);
1076    } else {
1077      FlagOp.setImm(1);
1078    }
1079  } else {
1080      MachineOperand &FlagOp = getFlagOp(MI, Operand);
1081      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1082  }
1083}
1084
1085void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
1086                              unsigned Flag) const {
1087  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1088  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1089    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1090    FlagOp.setImm(0);
1091  } else {
1092    MachineOperand &FlagOp = getFlagOp(MI);
1093    unsigned InstFlags = FlagOp.getImm();
1094    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1095    FlagOp.setImm(InstFlags);
1096  }
1097}
1098