1//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Implementation of TargetInstrInfo.
12//
13//===----------------------------------------------------------------------===//
14
15#include "R600InstrInfo.h"
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "AMDGPUTargetMachine.h"
19#include "R600Defines.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25
26#define GET_INSTRINFO_CTOR
27#include "AMDGPUGenDFAPacketizer.inc"
28
29using namespace llvm;
30
31R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
32  : AMDGPUInstrInfo(tm),
33    RI(tm),
34    ST(tm.getSubtarget<AMDGPUSubtarget>())
35  { }
36
37const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
38  return RI;
39}
40
41bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
42  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
43}
44
45bool R600InstrInfo::isVector(const MachineInstr &MI) const {
46  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
47}
48
49void
50R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
51                           MachineBasicBlock::iterator MI, DebugLoc DL,
52                           unsigned DestReg, unsigned SrcReg,
53                           bool KillSrc) const {
54  unsigned VectorComponents = 0;
55  if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
56      AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
57    VectorComponents = 4;
58  } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
59            AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
60    VectorComponents = 2;
61  }
62
63  if (VectorComponents > 0) {
64    for (unsigned I = 0; I < VectorComponents; I++) {
65      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
66      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
67                              RI.getSubReg(DestReg, SubRegIndex),
68                              RI.getSubReg(SrcReg, SubRegIndex))
69                              .addReg(DestReg,
70                                      RegState::Define | RegState::Implicit);
71    }
72  } else {
73    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
74                                                  DestReg, SrcReg);
75    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
76                                    .setIsKill(KillSrc);
77  }
78}
79
80MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
81                                             unsigned DstReg, int64_t Imm) const {
82  MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
83  MachineInstrBuilder MIB(*MF, MI);
84  MIB.addReg(DstReg, RegState::Define);
85  MIB.addReg(AMDGPU::ALU_LITERAL_X);
86  MIB.addImm(Imm);
87  MIB.addReg(0); // PREDICATE_BIT
88
89  return MI;
90}
91
92unsigned R600InstrInfo::getIEQOpcode() const {
93  return AMDGPU::SETE_INT;
94}
95
96bool R600InstrInfo::isMov(unsigned Opcode) const {
97
98
99  switch(Opcode) {
100  default: return false;
101  case AMDGPU::MOV:
102  case AMDGPU::MOV_IMM_F32:
103  case AMDGPU::MOV_IMM_I32:
104    return true;
105  }
106}
107
108// Some instructions act as place holders to emulate operations that the GPU
109// hardware does automatically. This function can be used to check if
110// an opcode falls into this category.
111bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
112  switch (Opcode) {
113  default: return false;
114  case AMDGPU::RETURN:
115    return true;
116  }
117}
118
119bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
120  return false;
121}
122
123bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
124  switch(Opcode) {
125    default: return false;
126    case AMDGPU::CUBE_r600_pseudo:
127    case AMDGPU::CUBE_r600_real:
128    case AMDGPU::CUBE_eg_pseudo:
129    case AMDGPU::CUBE_eg_real:
130      return true;
131  }
132}
133
134bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
135  unsigned TargetFlags = get(Opcode).TSFlags;
136
137  return (TargetFlags & R600_InstFlag::ALU_INST);
138}
139
140bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
141  unsigned TargetFlags = get(Opcode).TSFlags;
142
143  return ((TargetFlags & R600_InstFlag::OP1) |
144          (TargetFlags & R600_InstFlag::OP2) |
145          (TargetFlags & R600_InstFlag::OP3));
146}
147
148bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
149  unsigned TargetFlags = get(Opcode).TSFlags;
150
151  return ((TargetFlags & R600_InstFlag::LDS_1A) |
152          (TargetFlags & R600_InstFlag::LDS_1A1D));
153}
154
155bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
156  return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
157}
158
159bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
160  return isTransOnly(MI->getOpcode());
161}
162
163bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
164  return ST.hasVertexCache() && IS_VTX(get(Opcode));
165}
166
167bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
168  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
169  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
170}
171
172bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
173  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
174}
175
176bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
177  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
178  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
179         usesTextureCache(MI->getOpcode());
180}
181
182bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
183  switch (Opcode) {
184  case AMDGPU::KILLGT:
185  case AMDGPU::GROUP_BARRIER:
186    return true;
187  default:
188    return false;
189  }
190}
191
192int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
193  static const unsigned OpTable[] = {
194    AMDGPU::OpName::src0,
195    AMDGPU::OpName::src1,
196    AMDGPU::OpName::src2
197  };
198
199  assert (SrcNum < 3);
200  return getOperandIdx(Opcode, OpTable[SrcNum]);
201}
202
203#define SRC_SEL_ROWS 11
204int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
205  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
206    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
207    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
208    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
209    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
210    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
211    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
212    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
213    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
214    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
215    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
216    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
217  };
218
219  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
220    if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
221      return getOperandIdx(Opcode, SrcSelTable[i][1]);
222    }
223  }
224  return -1;
225}
226#undef SRC_SEL_ROWS
227
228SmallVector<std::pair<MachineOperand *, int64_t>, 3>
229R600InstrInfo::getSrcs(MachineInstr *MI) const {
230  SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
231
232  if (MI->getOpcode() == AMDGPU::DOT_4) {
233    static const unsigned OpTable[8][2] = {
234      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
235      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
236      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
237      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
238      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
239      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
240      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
241      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
242    };
243
244    for (unsigned j = 0; j < 8; j++) {
245      MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
246                                                        OpTable[j][0]));
247      unsigned Reg = MO.getReg();
248      if (Reg == AMDGPU::ALU_CONST) {
249        unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
250                                                    OpTable[j][1])).getImm();
251        Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
252        continue;
253      }
254
255    }
256    return Result;
257  }
258
259  static const unsigned OpTable[3][2] = {
260    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
261    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
262    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
263  };
264
265  for (unsigned j = 0; j < 3; j++) {
266    int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
267    if (SrcIdx < 0)
268      break;
269    MachineOperand &MO = MI->getOperand(SrcIdx);
270    unsigned Reg = MI->getOperand(SrcIdx).getReg();
271    if (Reg == AMDGPU::ALU_CONST) {
272      unsigned Sel = MI->getOperand(
273          getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
274      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
275      continue;
276    }
277    if (Reg == AMDGPU::ALU_LITERAL_X) {
278      unsigned Imm = MI->getOperand(
279          getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
280      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
281      continue;
282    }
283    Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
284  }
285  return Result;
286}
287
288std::vector<std::pair<int, unsigned> >
289R600InstrInfo::ExtractSrcs(MachineInstr *MI,
290                           const DenseMap<unsigned, unsigned> &PV,
291                           unsigned &ConstCount) const {
292  ConstCount = 0;
293  const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
294  const std::pair<int, unsigned> DummyPair(-1, 0);
295  std::vector<std::pair<int, unsigned> > Result;
296  unsigned i = 0;
297  for (unsigned n = Srcs.size(); i < n; ++i) {
298    unsigned Reg = Srcs[i].first->getReg();
299    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
300    if (Reg == AMDGPU::OQAP) {
301      Result.push_back(std::pair<int, unsigned>(Index, 0));
302    }
303    if (PV.find(Reg) != PV.end()) {
304      // 255 is used to tells its a PS/PV reg
305      Result.push_back(std::pair<int, unsigned>(255, 0));
306      continue;
307    }
308    if (Index > 127) {
309      ConstCount++;
310      Result.push_back(DummyPair);
311      continue;
312    }
313    unsigned Chan = RI.getHWRegChan(Reg);
314    Result.push_back(std::pair<int, unsigned>(Index, Chan));
315  }
316  for (; i < 3; ++i)
317    Result.push_back(DummyPair);
318  return Result;
319}
320
321static std::vector<std::pair<int, unsigned> >
322Swizzle(std::vector<std::pair<int, unsigned> > Src,
323        R600InstrInfo::BankSwizzle Swz) {
324  switch (Swz) {
325  case R600InstrInfo::ALU_VEC_012_SCL_210:
326    break;
327  case R600InstrInfo::ALU_VEC_021_SCL_122:
328    std::swap(Src[1], Src[2]);
329    break;
330  case R600InstrInfo::ALU_VEC_102_SCL_221:
331    std::swap(Src[0], Src[1]);
332    break;
333  case R600InstrInfo::ALU_VEC_120_SCL_212:
334    std::swap(Src[0], Src[1]);
335    std::swap(Src[0], Src[2]);
336    break;
337  case R600InstrInfo::ALU_VEC_201:
338    std::swap(Src[0], Src[2]);
339    std::swap(Src[0], Src[1]);
340    break;
341  case R600InstrInfo::ALU_VEC_210:
342    std::swap(Src[0], Src[2]);
343    break;
344  }
345  return Src;
346}
347
348static unsigned
349getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
350  switch (Swz) {
351  case R600InstrInfo::ALU_VEC_012_SCL_210: {
352    unsigned Cycles[3] = { 2, 1, 0};
353    return Cycles[Op];
354  }
355  case R600InstrInfo::ALU_VEC_021_SCL_122: {
356    unsigned Cycles[3] = { 1, 2, 2};
357    return Cycles[Op];
358  }
359  case R600InstrInfo::ALU_VEC_120_SCL_212: {
360    unsigned Cycles[3] = { 2, 1, 2};
361    return Cycles[Op];
362  }
363  case R600InstrInfo::ALU_VEC_102_SCL_221: {
364    unsigned Cycles[3] = { 2, 2, 1};
365    return Cycles[Op];
366  }
367  default:
368    llvm_unreachable("Wrong Swizzle for Trans Slot");
369    return 0;
370  }
371}
372
373/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
374/// in the same Instruction Group while meeting read port limitations given a
375/// Swz swizzle sequence.
376unsigned  R600InstrInfo::isLegalUpTo(
377    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
378    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
379    const std::vector<std::pair<int, unsigned> > &TransSrcs,
380    R600InstrInfo::BankSwizzle TransSwz) const {
381  int Vector[4][3];
382  memset(Vector, -1, sizeof(Vector));
383  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
384    const std::vector<std::pair<int, unsigned> > &Srcs =
385        Swizzle(IGSrcs[i], Swz[i]);
386    for (unsigned j = 0; j < 3; j++) {
387      const std::pair<int, unsigned> &Src = Srcs[j];
388      if (Src.first < 0 || Src.first == 255)
389        continue;
390      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
391        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
392            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
393            // The value from output queue A (denoted by register OQAP) can
394            // only be fetched during the first cycle.
395            return false;
396        }
397        // OQAP does not count towards the normal read port restrictions
398        continue;
399      }
400      if (Vector[Src.second][j] < 0)
401        Vector[Src.second][j] = Src.first;
402      if (Vector[Src.second][j] != Src.first)
403        return i;
404    }
405  }
406  // Now check Trans Alu
407  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
408    const std::pair<int, unsigned> &Src = TransSrcs[i];
409    unsigned Cycle = getTransSwizzle(TransSwz, i);
410    if (Src.first < 0)
411      continue;
412    if (Src.first == 255)
413      continue;
414    if (Vector[Src.second][Cycle] < 0)
415      Vector[Src.second][Cycle] = Src.first;
416    if (Vector[Src.second][Cycle] != Src.first)
417      return IGSrcs.size() - 1;
418  }
419  return IGSrcs.size();
420}
421
422/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
423/// (in lexicographic term) swizzle sequence assuming that all swizzles after
424/// Idx can be skipped
425static bool
426NextPossibleSolution(
427    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
428    unsigned Idx) {
429  assert(Idx < SwzCandidate.size());
430  int ResetIdx = Idx;
431  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
432    ResetIdx --;
433  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
434    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
435  }
436  if (ResetIdx == -1)
437    return false;
438  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
439  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
440  return true;
441}
442
443/// Enumerate all possible Swizzle sequence to find one that can meet all
444/// read port requirements.
445bool R600InstrInfo::FindSwizzleForVectorSlot(
446    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
447    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
448    const std::vector<std::pair<int, unsigned> > &TransSrcs,
449    R600InstrInfo::BankSwizzle TransSwz) const {
450  unsigned ValidUpTo = 0;
451  do {
452    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
453    if (ValidUpTo == IGSrcs.size())
454      return true;
455  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
456  return false;
457}
458
459/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
460/// a const, and can't read a gpr at cycle 1 if they read 2 const.
461static bool
462isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
463                  const std::vector<std::pair<int, unsigned> > &TransOps,
464                  unsigned ConstCount) {
465  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
466    const std::pair<int, unsigned> &Src = TransOps[i];
467    unsigned Cycle = getTransSwizzle(TransSwz, i);
468    if (Src.first < 0)
469      continue;
470    if (ConstCount > 0 && Cycle == 0)
471      return false;
472    if (ConstCount > 1 && Cycle == 1)
473      return false;
474  }
475  return true;
476}
477
478bool
479R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
480                                       const DenseMap<unsigned, unsigned> &PV,
481                                       std::vector<BankSwizzle> &ValidSwizzle,
482                                       bool isLastAluTrans)
483    const {
484  //Todo : support shared src0 - src1 operand
485
486  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
487  ValidSwizzle.clear();
488  unsigned ConstCount;
489  BankSwizzle TransBS = ALU_VEC_012_SCL_210;
490  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
491    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
492    unsigned Op = getOperandIdx(IG[i]->getOpcode(),
493        AMDGPU::OpName::bank_swizzle);
494    ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
495        IG[i]->getOperand(Op).getImm());
496  }
497  std::vector<std::pair<int, unsigned> > TransOps;
498  if (!isLastAluTrans)
499    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
500
501  TransOps = IGSrcs.back();
502  IGSrcs.pop_back();
503  ValidSwizzle.pop_back();
504
505  static const R600InstrInfo::BankSwizzle TransSwz[] = {
506    ALU_VEC_012_SCL_210,
507    ALU_VEC_021_SCL_122,
508    ALU_VEC_120_SCL_212,
509    ALU_VEC_102_SCL_221
510  };
511  for (unsigned i = 0; i < 4; i++) {
512    TransBS = TransSwz[i];
513    if (!isConstCompatible(TransBS, TransOps, ConstCount))
514      continue;
515    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
516        TransBS);
517    if (Result) {
518      ValidSwizzle.push_back(TransBS);
519      return true;
520    }
521  }
522
523  return false;
524}
525
526
527bool
528R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
529    const {
530  assert (Consts.size() <= 12 && "Too many operands in instructions group");
531  unsigned Pair1 = 0, Pair2 = 0;
532  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
533    unsigned ReadConstHalf = Consts[i] & 2;
534    unsigned ReadConstIndex = Consts[i] & (~3);
535    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
536    if (!Pair1) {
537      Pair1 = ReadHalfConst;
538      continue;
539    }
540    if (Pair1 == ReadHalfConst)
541      continue;
542    if (!Pair2) {
543      Pair2 = ReadHalfConst;
544      continue;
545    }
546    if (Pair2 != ReadHalfConst)
547      return false;
548  }
549  return true;
550}
551
552bool
553R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
554    const {
555  std::vector<unsigned> Consts;
556  SmallSet<int64_t, 4> Literals;
557  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
558    MachineInstr *MI = MIs[i];
559    if (!isALUInstr(MI->getOpcode()))
560      continue;
561
562    const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
563        getSrcs(MI);
564
565    for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
566      std::pair<MachineOperand *, unsigned> Src = Srcs[j];
567      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
568        Literals.insert(Src.second);
569      if (Literals.size() > 4)
570        return false;
571      if (Src.first->getReg() == AMDGPU::ALU_CONST)
572        Consts.push_back(Src.second);
573      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
574          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
575        unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
576        unsigned Chan = RI.getHWRegChan(Src.first->getReg());
577        Consts.push_back((Index << 2) | Chan);
578      }
579    }
580  }
581  return fitsConstReadLimitations(Consts);
582}
583
584DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
585    const ScheduleDAG *DAG) const {
586  const InstrItineraryData *II = TM->getInstrItineraryData();
587  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
588}
589
590static bool
591isPredicateSetter(unsigned Opcode) {
592  switch (Opcode) {
593  case AMDGPU::PRED_X:
594    return true;
595  default:
596    return false;
597  }
598}
599
600static MachineInstr *
601findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
602                             MachineBasicBlock::iterator I) {
603  while (I != MBB.begin()) {
604    --I;
605    MachineInstr *MI = I;
606    if (isPredicateSetter(MI->getOpcode()))
607      return MI;
608  }
609
610  return NULL;
611}
612
613static
614bool isJump(unsigned Opcode) {
615  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
616}
617
618bool
619R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
620                             MachineBasicBlock *&TBB,
621                             MachineBasicBlock *&FBB,
622                             SmallVectorImpl<MachineOperand> &Cond,
623                             bool AllowModify) const {
624  // Most of the following comes from the ARM implementation of AnalyzeBranch
625
626  // If the block has no terminators, it just falls into the block after it.
627  MachineBasicBlock::iterator I = MBB.end();
628  if (I == MBB.begin())
629    return false;
630  --I;
631  while (I->isDebugValue()) {
632    if (I == MBB.begin())
633      return false;
634    --I;
635  }
636  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
637    return false;
638  }
639
640  // Get the last instruction in the block.
641  MachineInstr *LastInst = I;
642
643  // If there is only one terminator instruction, process it.
644  unsigned LastOpc = LastInst->getOpcode();
645  if (I == MBB.begin() ||
646          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
647    if (LastOpc == AMDGPU::JUMP) {
648      TBB = LastInst->getOperand(0).getMBB();
649      return false;
650    } else if (LastOpc == AMDGPU::JUMP_COND) {
651      MachineInstr *predSet = I;
652      while (!isPredicateSetter(predSet->getOpcode())) {
653        predSet = --I;
654      }
655      TBB = LastInst->getOperand(0).getMBB();
656      Cond.push_back(predSet->getOperand(1));
657      Cond.push_back(predSet->getOperand(2));
658      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
659      return false;
660    }
661    return true;  // Can't handle indirect branch.
662  }
663
664  // Get the instruction before it if it is a terminator.
665  MachineInstr *SecondLastInst = I;
666  unsigned SecondLastOpc = SecondLastInst->getOpcode();
667
668  // If the block ends with a B and a Bcc, handle it.
669  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
670    MachineInstr *predSet = --I;
671    while (!isPredicateSetter(predSet->getOpcode())) {
672      predSet = --I;
673    }
674    TBB = SecondLastInst->getOperand(0).getMBB();
675    FBB = LastInst->getOperand(0).getMBB();
676    Cond.push_back(predSet->getOperand(1));
677    Cond.push_back(predSet->getOperand(2));
678    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
679    return false;
680  }
681
682  // Otherwise, can't handle this.
683  return true;
684}
685
686int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
687  const MachineInstr *MI = op.getParent();
688
689  switch (MI->getDesc().OpInfo->RegClass) {
690  default: // FIXME: fallthrough??
691  case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
692  case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
693  };
694}
695
696static
697MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
698  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
699      It != E; ++It) {
700    if (It->getOpcode() == AMDGPU::CF_ALU ||
701        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
702      return llvm::prior(It.base());
703  }
704  return MBB.end();
705}
706
707unsigned
708R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
709                            MachineBasicBlock *TBB,
710                            MachineBasicBlock *FBB,
711                            const SmallVectorImpl<MachineOperand> &Cond,
712                            DebugLoc DL) const {
713  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
714
715  if (FBB == 0) {
716    if (Cond.empty()) {
717      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
718      return 1;
719    } else {
720      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
721      assert(PredSet && "No previous predicate !");
722      addFlag(PredSet, 0, MO_FLAG_PUSH);
723      PredSet->getOperand(2).setImm(Cond[1].getImm());
724
725      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
726             .addMBB(TBB)
727             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
728      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
729      if (CfAlu == MBB.end())
730        return 1;
731      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
732      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
733      return 1;
734    }
735  } else {
736    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
737    assert(PredSet && "No previous predicate !");
738    addFlag(PredSet, 0, MO_FLAG_PUSH);
739    PredSet->getOperand(2).setImm(Cond[1].getImm());
740    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
741            .addMBB(TBB)
742            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
743    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
744    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
745    if (CfAlu == MBB.end())
746      return 2;
747    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
748    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
749    return 2;
750  }
751}
752
753unsigned
754R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
755
756  // Note : we leave PRED* instructions there.
757  // They may be needed when predicating instructions.
758
759  MachineBasicBlock::iterator I = MBB.end();
760
761  if (I == MBB.begin()) {
762    return 0;
763  }
764  --I;
765  switch (I->getOpcode()) {
766  default:
767    return 0;
768  case AMDGPU::JUMP_COND: {
769    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
770    clearFlag(predSet, 0, MO_FLAG_PUSH);
771    I->eraseFromParent();
772    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
773    if (CfAlu == MBB.end())
774      break;
775    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
776    CfAlu->setDesc(get(AMDGPU::CF_ALU));
777    break;
778  }
779  case AMDGPU::JUMP:
780    I->eraseFromParent();
781    break;
782  }
783  I = MBB.end();
784
785  if (I == MBB.begin()) {
786    return 1;
787  }
788  --I;
789  switch (I->getOpcode()) {
790    // FIXME: only one case??
791  default:
792    return 1;
793  case AMDGPU::JUMP_COND: {
794    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
795    clearFlag(predSet, 0, MO_FLAG_PUSH);
796    I->eraseFromParent();
797    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
798    if (CfAlu == MBB.end())
799      break;
800    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
801    CfAlu->setDesc(get(AMDGPU::CF_ALU));
802    break;
803  }
804  case AMDGPU::JUMP:
805    I->eraseFromParent();
806    break;
807  }
808  return 2;
809}
810
811bool
812R600InstrInfo::isPredicated(const MachineInstr *MI) const {
813  int idx = MI->findFirstPredOperandIdx();
814  if (idx < 0)
815    return false;
816
817  unsigned Reg = MI->getOperand(idx).getReg();
818  switch (Reg) {
819  default: return false;
820  case AMDGPU::PRED_SEL_ONE:
821  case AMDGPU::PRED_SEL_ZERO:
822  case AMDGPU::PREDICATE_BIT:
823    return true;
824  }
825}
826
827bool
828R600InstrInfo::isPredicable(MachineInstr *MI) const {
829  // XXX: KILL* instructions can be predicated, but they must be the last
830  // instruction in a clause, so this means any instructions after them cannot
831  // be predicated.  Until we have proper support for instruction clauses in the
832  // backend, we will mark KILL* instructions as unpredicable.
833
834  if (MI->getOpcode() == AMDGPU::KILLGT) {
835    return false;
836  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
837    // If the clause start in the middle of MBB then the MBB has more
838    // than a single clause, unable to predicate several clauses.
839    if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
840      return false;
841    // TODO: We don't support KC merging atm
842    if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
843      return false;
844    return true;
845  } else if (isVector(*MI)) {
846    return false;
847  } else {
848    return AMDGPUInstrInfo::isPredicable(MI);
849  }
850}
851
852
853bool
854R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
855                                   unsigned NumCyles,
856                                   unsigned ExtraPredCycles,
857                                   const BranchProbability &Probability) const{
858  return true;
859}
860
861bool
862R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
863                                   unsigned NumTCycles,
864                                   unsigned ExtraTCycles,
865                                   MachineBasicBlock &FMBB,
866                                   unsigned NumFCycles,
867                                   unsigned ExtraFCycles,
868                                   const BranchProbability &Probability) const {
869  return true;
870}
871
872bool
873R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
874                                         unsigned NumCyles,
875                                         const BranchProbability &Probability)
876                                         const {
877  return true;
878}
879
880bool
881R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
882                                         MachineBasicBlock &FMBB) const {
883  return false;
884}
885
886
887bool
888R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
889  MachineOperand &MO = Cond[1];
890  switch (MO.getImm()) {
891  case OPCODE_IS_ZERO_INT:
892    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
893    break;
894  case OPCODE_IS_NOT_ZERO_INT:
895    MO.setImm(OPCODE_IS_ZERO_INT);
896    break;
897  case OPCODE_IS_ZERO:
898    MO.setImm(OPCODE_IS_NOT_ZERO);
899    break;
900  case OPCODE_IS_NOT_ZERO:
901    MO.setImm(OPCODE_IS_ZERO);
902    break;
903  default:
904    return true;
905  }
906
907  MachineOperand &MO2 = Cond[2];
908  switch (MO2.getReg()) {
909  case AMDGPU::PRED_SEL_ZERO:
910    MO2.setReg(AMDGPU::PRED_SEL_ONE);
911    break;
912  case AMDGPU::PRED_SEL_ONE:
913    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
914    break;
915  default:
916    return true;
917  }
918  return false;
919}
920
921bool
922R600InstrInfo::DefinesPredicate(MachineInstr *MI,
923                                std::vector<MachineOperand> &Pred) const {
924  return isPredicateSetter(MI->getOpcode());
925}
926
927
928bool
929R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
930                       const SmallVectorImpl<MachineOperand> &Pred2) const {
931  return false;
932}
933
934
935bool
936R600InstrInfo::PredicateInstruction(MachineInstr *MI,
937                      const SmallVectorImpl<MachineOperand> &Pred) const {
938  int PIdx = MI->findFirstPredOperandIdx();
939
940  if (MI->getOpcode() == AMDGPU::CF_ALU) {
941    MI->getOperand(8).setImm(0);
942    return true;
943  }
944
945  if (PIdx != -1) {
946    MachineOperand &PMO = MI->getOperand(PIdx);
947    PMO.setReg(Pred[2].getReg());
948    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
949    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
950    return true;
951  }
952
953  return false;
954}
955
956unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
957                                            const MachineInstr *MI,
958                                            unsigned *PredCost) const {
959  if (PredCost)
960    *PredCost = 2;
961  return 2;
962}
963
964int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
965  const MachineRegisterInfo &MRI = MF.getRegInfo();
966  const MachineFrameInfo *MFI = MF.getFrameInfo();
967  int Offset = 0;
968
969  if (MFI->getNumObjects() == 0) {
970    return -1;
971  }
972
973  if (MRI.livein_empty()) {
974    return 0;
975  }
976
977  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
978                                            LE = MRI.livein_end();
979                                            LI != LE; ++LI) {
980    Offset = std::max(Offset,
981                      GET_REG_INDEX(RI.getEncodingValue(LI->first)));
982  }
983
984  return Offset + 1;
985}
986
987int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
988  int Offset = 0;
989  const MachineFrameInfo *MFI = MF.getFrameInfo();
990
991  // Variable sized objects are not supported
992  assert(!MFI->hasVarSizedObjects());
993
994  if (MFI->getNumObjects() == 0) {
995    return -1;
996  }
997
998  Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
999
1000  return getIndirectIndexBegin(MF) + Offset;
1001}
1002
1003std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
1004                                             const MachineFunction &MF) const {
1005  const AMDGPUFrameLowering *TFL =
1006                 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
1007  std::vector<unsigned> Regs;
1008
1009  unsigned StackWidth = TFL->getStackWidth(MF);
1010  int End = getIndirectIndexEnd(MF);
1011
1012  if (End == -1) {
1013    return Regs;
1014  }
1015
1016  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1017    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1018    Regs.push_back(SuperReg);
1019    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1020      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1021      Regs.push_back(Reg);
1022    }
1023  }
1024  return Regs;
1025}
1026
1027unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1028                                                 unsigned Channel) const {
1029  // XXX: Remove when we support a stack width > 2
1030  assert(Channel == 0);
1031  return RegIndex;
1032}
1033
1034const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
1035                                                     unsigned SourceReg) const {
1036  return &AMDGPU::R600_TReg32RegClass;
1037}
1038
1039const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
1040  return &AMDGPU::TRegMemRegClass;
1041}
1042
1043MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1044                                       MachineBasicBlock::iterator I,
1045                                       unsigned ValueReg, unsigned Address,
1046                                       unsigned OffsetReg) const {
1047  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
1048  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1049                                               AMDGPU::AR_X, OffsetReg);
1050  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
1051
1052  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1053                                      AddrReg, ValueReg)
1054                                      .addReg(AMDGPU::AR_X,
1055                                           RegState::Implicit | RegState::Kill);
1056  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
1057  return Mov;
1058}
1059
1060MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1061                                       MachineBasicBlock::iterator I,
1062                                       unsigned ValueReg, unsigned Address,
1063                                       unsigned OffsetReg) const {
1064  unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
1065  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1066                                                       AMDGPU::AR_X,
1067                                                       OffsetReg);
1068  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
1069  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1070                                      ValueReg,
1071                                      AddrReg)
1072                                      .addReg(AMDGPU::AR_X,
1073                                           RegState::Implicit | RegState::Kill);
1074  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
1075
1076  return Mov;
1077}
1078
1079const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
1080  return &AMDGPU::IndirectRegRegClass;
1081}
1082
1083unsigned R600InstrInfo::getMaxAlusPerClause() const {
1084  return 115;
1085}
1086
1087MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
1088                                                  MachineBasicBlock::iterator I,
1089                                                  unsigned Opcode,
1090                                                  unsigned DstReg,
1091                                                  unsigned Src0Reg,
1092                                                  unsigned Src1Reg) const {
1093  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1094    DstReg);           // $dst
1095
1096  if (Src1Reg) {
1097    MIB.addImm(0)     // $update_exec_mask
1098       .addImm(0);    // $update_predicate
1099  }
1100  MIB.addImm(1)        // $write
1101     .addImm(0)        // $omod
1102     .addImm(0)        // $dst_rel
1103     .addImm(0)        // $dst_clamp
1104     .addReg(Src0Reg)  // $src0
1105     .addImm(0)        // $src0_neg
1106     .addImm(0)        // $src0_rel
1107     .addImm(0)        // $src0_abs
1108     .addImm(-1);       // $src0_sel
1109
1110  if (Src1Reg) {
1111    MIB.addReg(Src1Reg) // $src1
1112       .addImm(0)       // $src1_neg
1113       .addImm(0)       // $src1_rel
1114       .addImm(0)       // $src1_abs
1115       .addImm(-1);      // $src1_sel
1116  }
1117
1118  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1119  //scheduling to the backend, we can change the default to 0.
1120  MIB.addImm(1)        // $last
1121      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1122      .addImm(0)         // $literal
1123      .addImm(0);        // $bank_swizzle
1124
1125  return MIB;
1126}
1127
1128#define OPERAND_CASE(Label) \
1129  case Label: { \
1130    static const unsigned Ops[] = \
1131    { \
1132      Label##_X, \
1133      Label##_Y, \
1134      Label##_Z, \
1135      Label##_W \
1136    }; \
1137    return Ops[Slot]; \
1138  }
1139
1140static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
1141  switch (Op) {
1142  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1143  OPERAND_CASE(AMDGPU::OpName::update_pred)
1144  OPERAND_CASE(AMDGPU::OpName::write)
1145  OPERAND_CASE(AMDGPU::OpName::omod)
1146  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1147  OPERAND_CASE(AMDGPU::OpName::clamp)
1148  OPERAND_CASE(AMDGPU::OpName::src0)
1149  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1150  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1151  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1152  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1153  OPERAND_CASE(AMDGPU::OpName::src1)
1154  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1155  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1156  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1157  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1158  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1159  default:
1160    llvm_unreachable("Wrong Operand");
1161  }
1162}
1163
1164#undef OPERAND_CASE
1165
1166MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
1167    MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1168    const {
1169  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1170  unsigned Opcode;
1171  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
1172  if (ST.getGeneration() <= AMDGPUSubtarget::R700)
1173    Opcode = AMDGPU::DOT4_r600;
1174  else
1175    Opcode = AMDGPU::DOT4_eg;
1176  MachineBasicBlock::iterator I = MI;
1177  MachineOperand &Src0 = MI->getOperand(
1178      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1179  MachineOperand &Src1 = MI->getOperand(
1180      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1181  MachineInstr *MIB = buildDefaultInstruction(
1182      MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1183  static const unsigned  Operands[14] = {
1184    AMDGPU::OpName::update_exec_mask,
1185    AMDGPU::OpName::update_pred,
1186    AMDGPU::OpName::write,
1187    AMDGPU::OpName::omod,
1188    AMDGPU::OpName::dst_rel,
1189    AMDGPU::OpName::clamp,
1190    AMDGPU::OpName::src0_neg,
1191    AMDGPU::OpName::src0_rel,
1192    AMDGPU::OpName::src0_abs,
1193    AMDGPU::OpName::src0_sel,
1194    AMDGPU::OpName::src1_neg,
1195    AMDGPU::OpName::src1_rel,
1196    AMDGPU::OpName::src1_abs,
1197    AMDGPU::OpName::src1_sel,
1198  };
1199
1200  for (unsigned i = 0; i < 14; i++) {
1201    MachineOperand &MO = MI->getOperand(
1202        getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1203    assert (MO.isImm());
1204    setImmOperand(MIB, Operands[i], MO.getImm());
1205  }
1206  MIB->getOperand(20).setImm(0);
1207  return MIB;
1208}
1209
1210MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
1211                                         MachineBasicBlock::iterator I,
1212                                         unsigned DstReg,
1213                                         uint64_t Imm) const {
1214  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1215                                                  AMDGPU::ALU_LITERAL_X);
1216  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
1217  return MovImm;
1218}
1219
1220int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1221  return getOperandIdx(MI.getOpcode(), Op);
1222}
1223
1224int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1225  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1226}
1227
1228void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
1229                                  int64_t Imm) const {
1230  int Idx = getOperandIdx(*MI, Op);
1231  assert(Idx != -1 && "Operand not supported for this instruction.");
1232  assert(MI->getOperand(Idx).isImm());
1233  MI->getOperand(Idx).setImm(Imm);
1234}
1235
1236//===----------------------------------------------------------------------===//
1237// Instruction flag getters/setters
1238//===----------------------------------------------------------------------===//
1239
1240bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
1241  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
1242}
1243
1244MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
1245                                         unsigned Flag) const {
1246  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1247  int FlagIndex = 0;
1248  if (Flag != 0) {
1249    // If we pass something other than the default value of Flag to this
1250    // function, it means we are want to set a flag on an instruction
1251    // that uses native encoding.
1252    assert(HAS_NATIVE_OPERANDS(TargetFlags));
1253    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1254    switch (Flag) {
1255    case MO_FLAG_CLAMP:
1256      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
1257      break;
1258    case MO_FLAG_MASK:
1259      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
1260      break;
1261    case MO_FLAG_NOT_LAST:
1262    case MO_FLAG_LAST:
1263      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
1264      break;
1265    case MO_FLAG_NEG:
1266      switch (SrcIdx) {
1267      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
1268      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
1269      case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
1270      }
1271      break;
1272
1273    case MO_FLAG_ABS:
1274      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1275                       "instructions.");
1276      (void)IsOP3;
1277      switch (SrcIdx) {
1278      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
1279      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
1280      }
1281      break;
1282
1283    default:
1284      FlagIndex = -1;
1285      break;
1286    }
1287    assert(FlagIndex != -1 && "Flag not supported for this instruction");
1288  } else {
1289      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1290      assert(FlagIndex != 0 &&
1291         "Instruction flags not supported for this instruction");
1292  }
1293
1294  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
1295  assert(FlagOp.isImm());
1296  return FlagOp;
1297}
1298
1299void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
1300                            unsigned Flag) const {
1301  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1302  if (Flag == 0) {
1303    return;
1304  }
1305  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1306    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1307    if (Flag == MO_FLAG_NOT_LAST) {
1308      clearFlag(MI, Operand, MO_FLAG_LAST);
1309    } else if (Flag == MO_FLAG_MASK) {
1310      clearFlag(MI, Operand, Flag);
1311    } else {
1312      FlagOp.setImm(1);
1313    }
1314  } else {
1315      MachineOperand &FlagOp = getFlagOp(MI, Operand);
1316      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1317  }
1318}
1319
1320void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
1321                              unsigned Flag) const {
1322  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1323  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1324    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1325    FlagOp.setImm(0);
1326  } else {
1327    MachineOperand &FlagOp = getFlagOp(MI);
1328    unsigned InstFlags = FlagOp.getImm();
1329    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1330    FlagOp.setImm(InstFlags);
1331  }
1332}
1333