R600InstrInfo.cpp revision b21ab43cfc3fa0dacf5c95f04e58b6d804b59a16
1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600InstrInfo.h"
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "AMDGPUTargetMachine.h"
19#include "R600Defines.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25
26#define GET_INSTRINFO_CTOR
27#include "AMDGPUGenDFAPacketizer.inc"
28
29using namespace llvm;
30
31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
32  : AMDGPUInstrInfo(tm),
33    RI(tm),
34    ST(tm.getSubtarget<AMDGPUSubtarget>())
35  { }
36
37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
38  return RI;
39}
40
41bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
42  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
43}
44
45bool R600InstrInfo::isVector(const MachineInstr &MI) const {
46  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
47}
48
49void
50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
51                           MachineBasicBlock::iterator MI, DebugLoc DL,
52                           unsigned DestReg, unsigned SrcReg,
53                           bool KillSrc) const {
54  unsigned VectorComponents = 0;
55  if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
56      AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
57    VectorComponents = 4;
58  } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
59            AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
60    VectorComponents = 2;
61  }
62
63  if (VectorComponents > 0) {
64    for (unsigned I = 0; I < VectorComponents; I++) {
65      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
66      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
67                              RI.getSubReg(DestReg, SubRegIndex),
68                              RI.getSubReg(SrcReg, SubRegIndex))
69                              .addReg(DestReg,
70                                      RegState::Define | RegState::Implicit);
71    }
72  } else {
73    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
74                                                  DestReg, SrcReg);
75    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
76                                    .setIsKill(KillSrc);
77  }
78}
79
80unsigned R600InstrInfo::getIEQOpcode() const {
81  return AMDGPU::SETE_INT;
82}
83
84bool R600InstrInfo::isMov(unsigned Opcode) const {
85
86
87  switch(Opcode) {
88  default: return false;
89  case AMDGPU::MOV:
90  case AMDGPU::MOV_IMM_F32:
91  case AMDGPU::MOV_IMM_I32:
92    return true;
93  }
94}
95
96// Some instructions act as place holders to emulate operations that the GPU
97// hardware does automatically. This function can be used to check if
98// an opcode falls into this category.
99bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
100  switch (Opcode) {
101  default: return false;
102  case AMDGPU::RETURN:
103    return true;
104  }
105}
106
107bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
108  return false;
109}
110
111bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
112  switch(Opcode) {
113    default: return false;
114    case AMDGPU::CUBE_r600_pseudo:
115    case AMDGPU::CUBE_r600_real:
116    case AMDGPU::CUBE_eg_pseudo:
117    case AMDGPU::CUBE_eg_real:
118      return true;
119  }
120}
121
122bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
123  unsigned TargetFlags = get(Opcode).TSFlags;
124
125  return (TargetFlags & R600_InstFlag::ALU_INST);
126}
127
128bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
129  unsigned TargetFlags = get(Opcode).TSFlags;
130
131  return ((TargetFlags & R600_InstFlag::OP1) |
132          (TargetFlags & R600_InstFlag::OP2) |
133          (TargetFlags & R600_InstFlag::OP3));
134}
135
136bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
137  unsigned TargetFlags = get(Opcode).TSFlags;
138
139  return ((TargetFlags & R600_InstFlag::LDS_1A) |
140          (TargetFlags & R600_InstFlag::LDS_1A1D) |
141          (TargetFlags & R600_InstFlag::LDS_1A2D));
142}
143
144bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
145  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
146}
147
148bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
149  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
150}
151
152bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
153  if (isALUInstr(MI->getOpcode()))
154    return true;
155  if (isVector(*MI) || isCubeOp(MI->getOpcode()))
156    return true;
157  switch (MI->getOpcode()) {
158  case AMDGPU::PRED_X:
159  case AMDGPU::INTERP_PAIR_XY:
160  case AMDGPU::INTERP_PAIR_ZW:
161  case AMDGPU::INTERP_VEC_LOAD:
162  case AMDGPU::COPY:
163  case AMDGPU::DOT_4:
164    return true;
165  default:
166    return false;
167  }
168}
169
170bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
171  if (ST.hasCaymanISA())
172    return false;
173  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
174}
175
176bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
177  return isTransOnly(MI->getOpcode());
178}
179
180bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
181  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
182}
183
184bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
185  return isVectorOnly(MI->getOpcode());
186}
187
188bool R600InstrInfo::isExport(unsigned Opcode) const {
189  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
190}
191
192bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
193  return ST.hasVertexCache() && IS_VTX(get(Opcode));
194}
195
196bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
197  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
198  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
199}
200
201bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
202  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
203}
204
205bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
206  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
207  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
208         usesTextureCache(MI->getOpcode());
209}
210
211bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
212  switch (Opcode) {
213  case AMDGPU::KILLGT:
214  case AMDGPU::GROUP_BARRIER:
215    return true;
216  default:
217    return false;
218  }
219}
220
221bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
222  return  MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
223}
224
225bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
226  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
227}
228
229bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
230  if (!isALUInstr(MI->getOpcode())) {
231    return false;
232  }
233  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
234                                        E = MI->operands_end(); I != E; ++I) {
235    if (!I->isReg() || !I->isUse() ||
236        TargetRegisterInfo::isVirtualRegister(I->getReg()))
237      continue;
238
239    if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
240      return true;
241  }
242  return false;
243}
244
245int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
246  static const unsigned OpTable[] = {
247    AMDGPU::OpName::src0,
248    AMDGPU::OpName::src1,
249    AMDGPU::OpName::src2
250  };
251
252  assert (SrcNum < 3);
253  return getOperandIdx(Opcode, OpTable[SrcNum]);
254}
255
256#define SRC_SEL_ROWS 11
257int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
258  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
259    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
260    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
261    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
262    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
263    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
264    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
265    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
266    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
267    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
268    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
269    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
270  };
271
272  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
273    if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
274      return getOperandIdx(Opcode, SrcSelTable[i][1]);
275    }
276  }
277  return -1;
278}
279#undef SRC_SEL_ROWS
280
281SmallVector<std::pair<MachineOperand *, int64_t>, 3>
282R600InstrInfo::getSrcs(MachineInstr *MI) const {
283  SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
284
285  if (MI->getOpcode() == AMDGPU::DOT_4) {
286    static const unsigned OpTable[8][2] = {
287      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
288      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
289      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
290      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
291      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
292      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
293      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
294      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
295    };
296
297    for (unsigned j = 0; j < 8; j++) {
298      MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
299                                                        OpTable[j][0]));
300      unsigned Reg = MO.getReg();
301      if (Reg == AMDGPU::ALU_CONST) {
302        unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
303                                                    OpTable[j][1])).getImm();
304        Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
305        continue;
306      }
307
308    }
309    return Result;
310  }
311
312  static const unsigned OpTable[3][2] = {
313    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
314    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
315    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
316  };
317
318  for (unsigned j = 0; j < 3; j++) {
319    int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
320    if (SrcIdx < 0)
321      break;
322    MachineOperand &MO = MI->getOperand(SrcIdx);
323    unsigned Reg = MI->getOperand(SrcIdx).getReg();
324    if (Reg == AMDGPU::ALU_CONST) {
325      unsigned Sel = MI->getOperand(
326          getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
327      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
328      continue;
329    }
330    if (Reg == AMDGPU::ALU_LITERAL_X) {
331      unsigned Imm = MI->getOperand(
332          getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
333      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
334      continue;
335    }
336    Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
337  }
338  return Result;
339}
340
341std::vector<std::pair<int, unsigned> >
342R600InstrInfo::ExtractSrcs(MachineInstr *MI,
343                           const DenseMap<unsigned, unsigned> &PV,
344                           unsigned &ConstCount) const {
345  ConstCount = 0;
346  const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
347  const std::pair<int, unsigned> DummyPair(-1, 0);
348  std::vector<std::pair<int, unsigned> > Result;
349  unsigned i = 0;
350  for (unsigned n = Srcs.size(); i < n; ++i) {
351    unsigned Reg = Srcs[i].first->getReg();
352    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
353    if (Reg == AMDGPU::OQAP) {
354      Result.push_back(std::pair<int, unsigned>(Index, 0));
355    }
356    if (PV.find(Reg) != PV.end()) {
357      // 255 is used to tells its a PS/PV reg
358      Result.push_back(std::pair<int, unsigned>(255, 0));
359      continue;
360    }
361    if (Index > 127) {
362      ConstCount++;
363      Result.push_back(DummyPair);
364      continue;
365    }
366    unsigned Chan = RI.getHWRegChan(Reg);
367    Result.push_back(std::pair<int, unsigned>(Index, Chan));
368  }
369  for (; i < 3; ++i)
370    Result.push_back(DummyPair);
371  return Result;
372}
373
374static std::vector<std::pair<int, unsigned> >
375Swizzle(std::vector<std::pair<int, unsigned> > Src,
376        R600InstrInfo::BankSwizzle Swz) {
377  if (Src[0] == Src[1])
378    Src[1].first = -1;
379  switch (Swz) {
380  case R600InstrInfo::ALU_VEC_012_SCL_210:
381    break;
382  case R600InstrInfo::ALU_VEC_021_SCL_122:
383    std::swap(Src[1], Src[2]);
384    break;
385  case R600InstrInfo::ALU_VEC_102_SCL_221:
386    std::swap(Src[0], Src[1]);
387    break;
388  case R600InstrInfo::ALU_VEC_120_SCL_212:
389    std::swap(Src[0], Src[1]);
390    std::swap(Src[0], Src[2]);
391    break;
392  case R600InstrInfo::ALU_VEC_201:
393    std::swap(Src[0], Src[2]);
394    std::swap(Src[0], Src[1]);
395    break;
396  case R600InstrInfo::ALU_VEC_210:
397    std::swap(Src[0], Src[2]);
398    break;
399  }
400  return Src;
401}
402
403static unsigned
404getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
405  switch (Swz) {
406  case R600InstrInfo::ALU_VEC_012_SCL_210: {
407    unsigned Cycles[3] = { 2, 1, 0};
408    return Cycles[Op];
409  }
410  case R600InstrInfo::ALU_VEC_021_SCL_122: {
411    unsigned Cycles[3] = { 1, 2, 2};
412    return Cycles[Op];
413  }
414  case R600InstrInfo::ALU_VEC_120_SCL_212: {
415    unsigned Cycles[3] = { 2, 1, 2};
416    return Cycles[Op];
417  }
418  case R600InstrInfo::ALU_VEC_102_SCL_221: {
419    unsigned Cycles[3] = { 2, 2, 1};
420    return Cycles[Op];
421  }
422  default:
423    llvm_unreachable("Wrong Swizzle for Trans Slot");
424    return 0;
425  }
426}
427
428/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
429/// in the same Instruction Group while meeting read port limitations given a
430/// Swz swizzle sequence.
431unsigned  R600InstrInfo::isLegalUpTo(
432    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
433    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
434    const std::vector<std::pair<int, unsigned> > &TransSrcs,
435    R600InstrInfo::BankSwizzle TransSwz) const {
436  int Vector[4][3];
437  memset(Vector, -1, sizeof(Vector));
438  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
439    const std::vector<std::pair<int, unsigned> > &Srcs =
440        Swizzle(IGSrcs[i], Swz[i]);
441    for (unsigned j = 0; j < 3; j++) {
442      const std::pair<int, unsigned> &Src = Srcs[j];
443      if (Src.first < 0 || Src.first == 255)
444        continue;
445      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
446        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
447            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
448            // The value from output queue A (denoted by register OQAP) can
449            // only be fetched during the first cycle.
450            return false;
451        }
452        // OQAP does not count towards the normal read port restrictions
453        continue;
454      }
455      if (Vector[Src.second][j] < 0)
456        Vector[Src.second][j] = Src.first;
457      if (Vector[Src.second][j] != Src.first)
458        return i;
459    }
460  }
461  // Now check Trans Alu
462  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
463    const std::pair<int, unsigned> &Src = TransSrcs[i];
464    unsigned Cycle = getTransSwizzle(TransSwz, i);
465    if (Src.first < 0)
466      continue;
467    if (Src.first == 255)
468      continue;
469    if (Vector[Src.second][Cycle] < 0)
470      Vector[Src.second][Cycle] = Src.first;
471    if (Vector[Src.second][Cycle] != Src.first)
472      return IGSrcs.size() - 1;
473  }
474  return IGSrcs.size();
475}
476
477/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
478/// (in lexicographic term) swizzle sequence assuming that all swizzles after
479/// Idx can be skipped
480static bool
481NextPossibleSolution(
482    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
483    unsigned Idx) {
484  assert(Idx < SwzCandidate.size());
485  int ResetIdx = Idx;
486  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
487    ResetIdx --;
488  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
489    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
490  }
491  if (ResetIdx == -1)
492    return false;
493  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
494  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
495  return true;
496}
497
498/// Enumerate all possible Swizzle sequence to find one that can meet all
499/// read port requirements.
500bool R600InstrInfo::FindSwizzleForVectorSlot(
501    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
502    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
503    const std::vector<std::pair<int, unsigned> > &TransSrcs,
504    R600InstrInfo::BankSwizzle TransSwz) const {
505  unsigned ValidUpTo = 0;
506  do {
507    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
508    if (ValidUpTo == IGSrcs.size())
509      return true;
510  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
511  return false;
512}
513
514/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
515/// a const, and can't read a gpr at cycle 1 if they read 2 const.
516static bool
517isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
518                  const std::vector<std::pair<int, unsigned> > &TransOps,
519                  unsigned ConstCount) {
520  // TransALU can't read 3 constants
521  if (ConstCount > 2)
522    return false;
523  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
524    const std::pair<int, unsigned> &Src = TransOps[i];
525    unsigned Cycle = getTransSwizzle(TransSwz, i);
526    if (Src.first < 0)
527      continue;
528    if (ConstCount > 0 && Cycle == 0)
529      return false;
530    if (ConstCount > 1 && Cycle == 1)
531      return false;
532  }
533  return true;
534}
535
536bool
537R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
538                                       const DenseMap<unsigned, unsigned> &PV,
539                                       std::vector<BankSwizzle> &ValidSwizzle,
540                                       bool isLastAluTrans)
541    const {
542  //Todo : support shared src0 - src1 operand
543
544  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
545  ValidSwizzle.clear();
546  unsigned ConstCount;
547  BankSwizzle TransBS = ALU_VEC_012_SCL_210;
548  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
549    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
550    unsigned Op = getOperandIdx(IG[i]->getOpcode(),
551        AMDGPU::OpName::bank_swizzle);
552    ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
553        IG[i]->getOperand(Op).getImm());
554  }
555  std::vector<std::pair<int, unsigned> > TransOps;
556  if (!isLastAluTrans)
557    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
558
559  TransOps = IGSrcs.back();
560  IGSrcs.pop_back();
561  ValidSwizzle.pop_back();
562
563  static const R600InstrInfo::BankSwizzle TransSwz[] = {
564    ALU_VEC_012_SCL_210,
565    ALU_VEC_021_SCL_122,
566    ALU_VEC_120_SCL_212,
567    ALU_VEC_102_SCL_221
568  };
569  for (unsigned i = 0; i < 4; i++) {
570    TransBS = TransSwz[i];
571    if (!isConstCompatible(TransBS, TransOps, ConstCount))
572      continue;
573    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
574        TransBS);
575    if (Result) {
576      ValidSwizzle.push_back(TransBS);
577      return true;
578    }
579  }
580
581  return false;
582}
583
584
585bool
586R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
587    const {
588  assert (Consts.size() <= 12 && "Too many operands in instructions group");
589  unsigned Pair1 = 0, Pair2 = 0;
590  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
591    unsigned ReadConstHalf = Consts[i] & 2;
592    unsigned ReadConstIndex = Consts[i] & (~3);
593    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
594    if (!Pair1) {
595      Pair1 = ReadHalfConst;
596      continue;
597    }
598    if (Pair1 == ReadHalfConst)
599      continue;
600    if (!Pair2) {
601      Pair2 = ReadHalfConst;
602      continue;
603    }
604    if (Pair2 != ReadHalfConst)
605      return false;
606  }
607  return true;
608}
609
610bool
611R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
612    const {
613  std::vector<unsigned> Consts;
614  SmallSet<int64_t, 4> Literals;
615  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
616    MachineInstr *MI = MIs[i];
617    if (!isALUInstr(MI->getOpcode()))
618      continue;
619
620    const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
621        getSrcs(MI);
622
623    for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
624      std::pair<MachineOperand *, unsigned> Src = Srcs[j];
625      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
626        Literals.insert(Src.second);
627      if (Literals.size() > 4)
628        return false;
629      if (Src.first->getReg() == AMDGPU::ALU_CONST)
630        Consts.push_back(Src.second);
631      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
632          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
633        unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
634        unsigned Chan = RI.getHWRegChan(Src.first->getReg());
635        Consts.push_back((Index << 2) | Chan);
636      }
637    }
638  }
639  return fitsConstReadLimitations(Consts);
640}
641
642DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
643    const ScheduleDAG *DAG) const {
644  const InstrItineraryData *II = TM->getInstrItineraryData();
645  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
646}
647
648static bool
649isPredicateSetter(unsigned Opcode) {
650  switch (Opcode) {
651  case AMDGPU::PRED_X:
652    return true;
653  default:
654    return false;
655  }
656}
657
658static MachineInstr *
659findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
660                             MachineBasicBlock::iterator I) {
661  while (I != MBB.begin()) {
662    --I;
663    MachineInstr *MI = I;
664    if (isPredicateSetter(MI->getOpcode()))
665      return MI;
666  }
667
668  return NULL;
669}
670
671static
672bool isJump(unsigned Opcode) {
673  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
674}
675
676static bool isBranch(unsigned Opcode) {
677  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
678      Opcode == AMDGPU::BRANCH_COND_f32;
679}
680
681bool
682R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
683                             MachineBasicBlock *&TBB,
684                             MachineBasicBlock *&FBB,
685                             SmallVectorImpl<MachineOperand> &Cond,
686                             bool AllowModify) const {
687  // Most of the following comes from the ARM implementation of AnalyzeBranch
688
689  // If the block has no terminators, it just falls into the block after it.
690  MachineBasicBlock::iterator I = MBB.end();
691  if (I == MBB.begin())
692    return false;
693  --I;
694  while (I->isDebugValue()) {
695    if (I == MBB.begin())
696      return false;
697    --I;
698  }
699  // AMDGPU::BRANCH* instructions are only available after isel and are not
700  // handled
701  if (isBranch(I->getOpcode()))
702    return true;
703  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
704    return false;
705  }
706
707  // Get the last instruction in the block.
708  MachineInstr *LastInst = I;
709
710  // If there is only one terminator instruction, process it.
711  unsigned LastOpc = LastInst->getOpcode();
712  if (I == MBB.begin() ||
713          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
714    if (LastOpc == AMDGPU::JUMP) {
715      TBB = LastInst->getOperand(0).getMBB();
716      return false;
717    } else if (LastOpc == AMDGPU::JUMP_COND) {
718      MachineInstr *predSet = I;
719      while (!isPredicateSetter(predSet->getOpcode())) {
720        predSet = --I;
721      }
722      TBB = LastInst->getOperand(0).getMBB();
723      Cond.push_back(predSet->getOperand(1));
724      Cond.push_back(predSet->getOperand(2));
725      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
726      return false;
727    }
728    return true;  // Can't handle indirect branch.
729  }
730
731  // Get the instruction before it if it is a terminator.
732  MachineInstr *SecondLastInst = I;
733  unsigned SecondLastOpc = SecondLastInst->getOpcode();
734
735  // If the block ends with a B and a Bcc, handle it.
736  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
737    MachineInstr *predSet = --I;
738    while (!isPredicateSetter(predSet->getOpcode())) {
739      predSet = --I;
740    }
741    TBB = SecondLastInst->getOperand(0).getMBB();
742    FBB = LastInst->getOperand(0).getMBB();
743    Cond.push_back(predSet->getOperand(1));
744    Cond.push_back(predSet->getOperand(2));
745    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
746    return false;
747  }
748
749  // Otherwise, can't handle this.
750  return true;
751}
752
753int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
754  const MachineInstr *MI = op.getParent();
755
756  switch (MI->getDesc().OpInfo->RegClass) {
757  default: // FIXME: fallthrough??
758  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
759  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
760  };
761}
762
763static
764MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
765  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
766      It != E; ++It) {
767    if (It->getOpcode() == AMDGPU::CF_ALU ||
768        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
769      return llvm::prior(It.base());
770  }
771  return MBB.end();
772}
773
774unsigned
775R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
776                            MachineBasicBlock *TBB,
777                            MachineBasicBlock *FBB,
778                            const SmallVectorImpl<MachineOperand> &Cond,
779                            DebugLoc DL) const {
780  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
781
782  if (FBB == 0) {
783    if (Cond.empty()) {
784      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
785      return 1;
786    } else {
787      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
788      assert(PredSet && "No previous predicate !");
789      addFlag(PredSet, 0, MO_FLAG_PUSH);
790      PredSet->getOperand(2).setImm(Cond[1].getImm());
791
792      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
793             .addMBB(TBB)
794             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
795      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
796      if (CfAlu == MBB.end())
797        return 1;
798      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
799      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
800      return 1;
801    }
802  } else {
803    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
804    assert(PredSet && "No previous predicate !");
805    addFlag(PredSet, 0, MO_FLAG_PUSH);
806    PredSet->getOperand(2).setImm(Cond[1].getImm());
807    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
808            .addMBB(TBB)
809            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
810    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
811    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
812    if (CfAlu == MBB.end())
813      return 2;
814    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
815    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
816    return 2;
817  }
818}
819
820unsigned
821R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
822
823  // Note : we leave PRED* instructions there.
824  // They may be needed when predicating instructions.
825
826  MachineBasicBlock::iterator I = MBB.end();
827
828  if (I == MBB.begin()) {
829    return 0;
830  }
831  --I;
832  switch (I->getOpcode()) {
833  default:
834    return 0;
835  case AMDGPU::JUMP_COND: {
836    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
837    clearFlag(predSet, 0, MO_FLAG_PUSH);
838    I->eraseFromParent();
839    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
840    if (CfAlu == MBB.end())
841      break;
842    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
843    CfAlu->setDesc(get(AMDGPU::CF_ALU));
844    break;
845  }
846  case AMDGPU::JUMP:
847    I->eraseFromParent();
848    break;
849  }
850  I = MBB.end();
851
852  if (I == MBB.begin()) {
853    return 1;
854  }
855  --I;
856  switch (I->getOpcode()) {
857    // FIXME: only one case??
858  default:
859    return 1;
860  case AMDGPU::JUMP_COND: {
861    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
862    clearFlag(predSet, 0, MO_FLAG_PUSH);
863    I->eraseFromParent();
864    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
865    if (CfAlu == MBB.end())
866      break;
867    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
868    CfAlu->setDesc(get(AMDGPU::CF_ALU));
869    break;
870  }
871  case AMDGPU::JUMP:
872    I->eraseFromParent();
873    break;
874  }
875  return 2;
876}
877
878bool
879R600InstrInfo::isPredicated(const MachineInstr *MI) const {
880  int idx = MI->findFirstPredOperandIdx();
881  if (idx < 0)
882    return false;
883
884  unsigned Reg = MI->getOperand(idx).getReg();
885  switch (Reg) {
886  default: return false;
887  case AMDGPU::PRED_SEL_ONE:
888  case AMDGPU::PRED_SEL_ZERO:
889  case AMDGPU::PREDICATE_BIT:
890    return true;
891  }
892}
893
894bool
895R600InstrInfo::isPredicable(MachineInstr *MI) const {
896  // XXX: KILL* instructions can be predicated, but they must be the last
897  // instruction in a clause, so this means any instructions after them cannot
898  // be predicated.  Until we have proper support for instruction clauses in the
899  // backend, we will mark KILL* instructions as unpredicable.
900
901  if (MI->getOpcode() == AMDGPU::KILLGT) {
902    return false;
903  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
904    // If the clause start in the middle of MBB then the MBB has more
905    // than a single clause, unable to predicate several clauses.
906    if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
907      return false;
908    // TODO: We don't support KC merging atm
909    if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
910      return false;
911    return true;
912  } else if (isVector(*MI)) {
913    return false;
914  } else {
915    return AMDGPUInstrInfo::isPredicable(MI);
916  }
917}
918
919
920bool
921R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
922                                   unsigned NumCyles,
923                                   unsigned ExtraPredCycles,
924                                   const BranchProbability &Probability) const{
925  return true;
926}
927
928bool
929R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
930                                   unsigned NumTCycles,
931                                   unsigned ExtraTCycles,
932                                   MachineBasicBlock &FMBB,
933                                   unsigned NumFCycles,
934                                   unsigned ExtraFCycles,
935                                   const BranchProbability &Probability) const {
936  return true;
937}
938
939bool
940R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
941                                         unsigned NumCyles,
942                                         const BranchProbability &Probability)
943                                         const {
944  return true;
945}
946
947bool
948R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
949                                         MachineBasicBlock &FMBB) const {
950  return false;
951}
952
953
954bool
955R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
956  MachineOperand &MO = Cond[1];
957  switch (MO.getImm()) {
958  case OPCODE_IS_ZERO_INT:
959    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
960    break;
961  case OPCODE_IS_NOT_ZERO_INT:
962    MO.setImm(OPCODE_IS_ZERO_INT);
963    break;
964  case OPCODE_IS_ZERO:
965    MO.setImm(OPCODE_IS_NOT_ZERO);
966    break;
967  case OPCODE_IS_NOT_ZERO:
968    MO.setImm(OPCODE_IS_ZERO);
969    break;
970  default:
971    return true;
972  }
973
974  MachineOperand &MO2 = Cond[2];
975  switch (MO2.getReg()) {
976  case AMDGPU::PRED_SEL_ZERO:
977    MO2.setReg(AMDGPU::PRED_SEL_ONE);
978    break;
979  case AMDGPU::PRED_SEL_ONE:
980    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
981    break;
982  default:
983    return true;
984  }
985  return false;
986}
987
988bool
989R600InstrInfo::DefinesPredicate(MachineInstr *MI,
990                                std::vector<MachineOperand> &Pred) const {
991  return isPredicateSetter(MI->getOpcode());
992}
993
994
995bool
996R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
997                       const SmallVectorImpl<MachineOperand> &Pred2) const {
998  return false;
999}
1000
1001
1002bool
1003R600InstrInfo::PredicateInstruction(MachineInstr *MI,
1004                      const SmallVectorImpl<MachineOperand> &Pred) const {
1005  int PIdx = MI->findFirstPredOperandIdx();
1006
1007  if (MI->getOpcode() == AMDGPU::CF_ALU) {
1008    MI->getOperand(8).setImm(0);
1009    return true;
1010  }
1011
1012  if (MI->getOpcode() == AMDGPU::DOT_4) {
1013    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
1014        .setReg(Pred[2].getReg());
1015    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
1016        .setReg(Pred[2].getReg());
1017    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
1018        .setReg(Pred[2].getReg());
1019    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
1020        .setReg(Pred[2].getReg());
1021    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1022    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1023    return true;
1024  }
1025
1026  if (PIdx != -1) {
1027    MachineOperand &PMO = MI->getOperand(PIdx);
1028    PMO.setReg(Pred[2].getReg());
1029    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1030    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1031    return true;
1032  }
1033
1034  return false;
1035}
1036
1037unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
1038  return 2;
1039}
1040
1041unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
1042                                            const MachineInstr *MI,
1043                                            unsigned *PredCost) const {
1044  if (PredCost)
1045    *PredCost = 2;
1046  return 2;
1047}
1048
1049void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1050                                             const MachineFunction &MF) const {
1051  const AMDGPUFrameLowering *TFL =
1052                 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
1053
1054  unsigned StackWidth = TFL->getStackWidth(MF);
1055  int End = getIndirectIndexEnd(MF);
1056
1057  if (End == -1)
1058    return;
1059
1060  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1061    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1062    Reserved.set(SuperReg);
1063    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1064      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1065      Reserved.set(Reg);
1066    }
1067  }
1068}
1069
1070unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1071                                                 unsigned Channel) const {
1072  // XXX: Remove when we support a stack width > 2
1073  assert(Channel == 0);
1074  return RegIndex;
1075}
1076
1077const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
1078  return &AMDGPU::R600_TReg32_XRegClass;
1079}
1080
1081MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1082                                       MachineBasicBlock::iterator I,
1083                                       unsigned ValueReg, unsigned Address,
1084                                       unsigned OffsetReg) const {
1085  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
1086  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1087                                               AMDGPU::AR_X, OffsetReg);
1088  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
1089
1090  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1091                                      AddrReg, ValueReg)
1092                                      .addReg(AMDGPU::AR_X,
1093                                           RegState::Implicit | RegState::Kill);
1094  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
1095  return Mov;
1096}
1097
1098MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1099                                       MachineBasicBlock::iterator I,
1100                                       unsigned ValueReg, unsigned Address,
1101                                       unsigned OffsetReg) const {
1102  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
1103  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1104                                                       AMDGPU::AR_X,
1105                                                       OffsetReg);
1106  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
1107  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1108                                      ValueReg,
1109                                      AddrReg)
1110                                      .addReg(AMDGPU::AR_X,
1111                                           RegState::Implicit | RegState::Kill);
1112  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
1113
1114  return Mov;
1115}
1116
1117unsigned R600InstrInfo::getMaxAlusPerClause() const {
1118  return 115;
1119}
1120
1121MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
1122                                                  MachineBasicBlock::iterator I,
1123                                                  unsigned Opcode,
1124                                                  unsigned DstReg,
1125                                                  unsigned Src0Reg,
1126                                                  unsigned Src1Reg) const {
1127  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1128    DstReg);           // $dst
1129
1130  if (Src1Reg) {
1131    MIB.addImm(0)     // $update_exec_mask
1132       .addImm(0);    // $update_predicate
1133  }
1134  MIB.addImm(1)        // $write
1135     .addImm(0)        // $omod
1136     .addImm(0)        // $dst_rel
1137     .addImm(0)        // $dst_clamp
1138     .addReg(Src0Reg)  // $src0
1139     .addImm(0)        // $src0_neg
1140     .addImm(0)        // $src0_rel
1141     .addImm(0)        // $src0_abs
1142     .addImm(-1);       // $src0_sel
1143
1144  if (Src1Reg) {
1145    MIB.addReg(Src1Reg) // $src1
1146       .addImm(0)       // $src1_neg
1147       .addImm(0)       // $src1_rel
1148       .addImm(0)       // $src1_abs
1149       .addImm(-1);      // $src1_sel
1150  }
1151
1152  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1153  //scheduling to the backend, we can change the default to 0.
1154  MIB.addImm(1)        // $last
1155      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1156      .addImm(0)         // $literal
1157      .addImm(0);        // $bank_swizzle
1158
1159  return MIB;
1160}
1161
1162#define OPERAND_CASE(Label) \
1163  case Label: { \
1164    static const unsigned Ops[] = \
1165    { \
1166      Label##_X, \
1167      Label##_Y, \
1168      Label##_Z, \
1169      Label##_W \
1170    }; \
1171    return Ops[Slot]; \
1172  }
1173
1174static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
1175  switch (Op) {
1176  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1177  OPERAND_CASE(AMDGPU::OpName::update_pred)
1178  OPERAND_CASE(AMDGPU::OpName::write)
1179  OPERAND_CASE(AMDGPU::OpName::omod)
1180  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1181  OPERAND_CASE(AMDGPU::OpName::clamp)
1182  OPERAND_CASE(AMDGPU::OpName::src0)
1183  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1184  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1185  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1186  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1187  OPERAND_CASE(AMDGPU::OpName::src1)
1188  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1189  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1190  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1191  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1192  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1193  default:
1194    llvm_unreachable("Wrong Operand");
1195  }
1196}
1197
1198#undef OPERAND_CASE
1199
1200MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
1201    MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1202    const {
1203  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1204  unsigned Opcode;
1205  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
1206  if (ST.getGeneration() <= AMDGPUSubtarget::R700)
1207    Opcode = AMDGPU::DOT4_r600;
1208  else
1209    Opcode = AMDGPU::DOT4_eg;
1210  MachineBasicBlock::iterator I = MI;
1211  MachineOperand &Src0 = MI->getOperand(
1212      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1213  MachineOperand &Src1 = MI->getOperand(
1214      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1215  MachineInstr *MIB = buildDefaultInstruction(
1216      MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1217  static const unsigned  Operands[14] = {
1218    AMDGPU::OpName::update_exec_mask,
1219    AMDGPU::OpName::update_pred,
1220    AMDGPU::OpName::write,
1221    AMDGPU::OpName::omod,
1222    AMDGPU::OpName::dst_rel,
1223    AMDGPU::OpName::clamp,
1224    AMDGPU::OpName::src0_neg,
1225    AMDGPU::OpName::src0_rel,
1226    AMDGPU::OpName::src0_abs,
1227    AMDGPU::OpName::src0_sel,
1228    AMDGPU::OpName::src1_neg,
1229    AMDGPU::OpName::src1_rel,
1230    AMDGPU::OpName::src1_abs,
1231    AMDGPU::OpName::src1_sel,
1232  };
1233
1234  MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
1235      getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1236  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1237      .setReg(MO.getReg());
1238
1239  for (unsigned i = 0; i < 14; i++) {
1240    MachineOperand &MO = MI->getOperand(
1241        getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1242    assert (MO.isImm());
1243    setImmOperand(MIB, Operands[i], MO.getImm());
1244  }
1245  MIB->getOperand(20).setImm(0);
1246  return MIB;
1247}
1248
1249MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
1250                                         MachineBasicBlock::iterator I,
1251                                         unsigned DstReg,
1252                                         uint64_t Imm) const {
1253  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1254                                                  AMDGPU::ALU_LITERAL_X);
1255  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
1256  return MovImm;
1257}
1258
1259MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
1260                                       MachineBasicBlock::iterator I,
1261                                       unsigned DstReg, unsigned SrcReg) const {
1262  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1263}
1264
1265int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1266  return getOperandIdx(MI.getOpcode(), Op);
1267}
1268
1269int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1270  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1271}
1272
1273void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
1274                                  int64_t Imm) const {
1275  int Idx = getOperandIdx(*MI, Op);
1276  assert(Idx != -1 && "Operand not supported for this instruction.");
1277  assert(MI->getOperand(Idx).isImm());
1278  MI->getOperand(Idx).setImm(Imm);
1279}
1280
1281//===----------------------------------------------------------------------===//
1282// Instruction flag getters/setters
1283//===----------------------------------------------------------------------===//
1284
1285bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
1286  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
1287}
1288
1289MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
1290                                         unsigned Flag) const {
1291  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1292  int FlagIndex = 0;
1293  if (Flag != 0) {
1294    // If we pass something other than the default value of Flag to this
1295    // function, it means we are want to set a flag on an instruction
1296    // that uses native encoding.
1297    assert(HAS_NATIVE_OPERANDS(TargetFlags));
1298    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1299    switch (Flag) {
1300    case MO_FLAG_CLAMP:
1301      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
1302      break;
1303    case MO_FLAG_MASK:
1304      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
1305      break;
1306    case MO_FLAG_NOT_LAST:
1307    case MO_FLAG_LAST:
1308      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
1309      break;
1310    case MO_FLAG_NEG:
1311      switch (SrcIdx) {
1312      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
1313      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
1314      case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
1315      }
1316      break;
1317
1318    case MO_FLAG_ABS:
1319      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1320                       "instructions.");
1321      (void)IsOP3;
1322      switch (SrcIdx) {
1323      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
1324      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
1325      }
1326      break;
1327
1328    default:
1329      FlagIndex = -1;
1330      break;
1331    }
1332    assert(FlagIndex != -1 && "Flag not supported for this instruction");
1333  } else {
1334      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1335      assert(FlagIndex != 0 &&
1336         "Instruction flags not supported for this instruction");
1337  }
1338
1339  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
1340  assert(FlagOp.isImm());
1341  return FlagOp;
1342}
1343
1344void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
1345                            unsigned Flag) const {
1346  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1347  if (Flag == 0) {
1348    return;
1349  }
1350  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1351    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1352    if (Flag == MO_FLAG_NOT_LAST) {
1353      clearFlag(MI, Operand, MO_FLAG_LAST);
1354    } else if (Flag == MO_FLAG_MASK) {
1355      clearFlag(MI, Operand, Flag);
1356    } else {
1357      FlagOp.setImm(1);
1358    }
1359  } else {
1360      MachineOperand &FlagOp = getFlagOp(MI, Operand);
1361      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1362  }
1363}
1364
1365void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
1366                              unsigned Flag) const {
1367  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1368  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1369    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1370    FlagOp.setImm(0);
1371  } else {
1372    MachineOperand &FlagOp = getFlagOp(MI);
1373    unsigned InstFlags = FlagOp.getImm();
1374    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1375    FlagOp.setImm(InstFlags);
1376  }
1377}
1378