R600InstrInfo.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
15f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//                     The LLVM Compiler Infrastructure
4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// License. See LICENSE.TXT for details.
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===//
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
105f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)/// \file
115c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu/// \brief R600 Implementation of TargetInstrInfo.
12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)//
135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)//===----------------------------------------------------------------------===//
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
15a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)#include "R600InstrInfo.h"
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPU.h"
176e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)#include "AMDGPUSubtarget.h"
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUTargetMachine.h"
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "R600Defines.h"
20ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "R600MachineFunctionInfo.h"
215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "R600RegisterInfo.h"
225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "llvm/CodeGen/MachineFrameInfo.h"
23116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch#include "llvm/CodeGen/MachineInstrBuilder.h"
245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h"
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)using namespace llvm;
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#define GET_INSTRINFO_CTOR_DTOR
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUGenDFAPacketizer.inc"
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  : AMDGPUInstrInfo(st),
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    RI(st)
342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  { }
35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return RI;
38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)bool R600InstrInfo::isTrig(const MachineInstr &MI) const {
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
42a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
435f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
445f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::isVector(const MachineInstr &MI) const {
455f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                           MachineBasicBlock::iterator MI, DebugLoc DL,
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                           unsigned DestReg, unsigned SrcReg,
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                           bool KillSrc) const {
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned VectorComponents = 0;
54010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
57010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)       AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    VectorComponents = 4;
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
61010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)             AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    VectorComponents = 2;
64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (VectorComponents > 0) {
675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    for (unsigned I = 0; I < VectorComponents; I++) {
685f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                              RI.getSubReg(DestReg, SubRegIndex),
716e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)                              RI.getSubReg(SrcReg, SubRegIndex))
722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                              .addReg(DestReg,
732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                      RegState::Define | RegState::Implicit);
742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
75ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  } else {
76010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                                  DestReg, SrcReg);
782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                    .setIsKill(KillSrc);
80010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
837d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)/// \returns true if \p MBBI can be moved into a new basic.
841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                       MachineBasicBlock::iterator MBBI) const {
86a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                        E = MBBI->operands_end(); I != E; ++I) {
885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
90116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return false;
912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
92a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  return true;
93a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}
94a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
95a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)unsigned R600InstrInfo::getIEQOpcode() const {
96a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  return AMDGPU::SETE_INT;
97a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}
98a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
99a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool R600InstrInfo::isMov(unsigned Opcode) const {
100a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
101a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
102a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  switch(Opcode) {
103a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  default: return false;
104a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  case AMDGPU::MOV:
105a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  case AMDGPU::MOV_IMM_F32:
106a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  case AMDGPU::MOV_IMM_I32:
107010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    return true;
108a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  }
109a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}
110a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)
111a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)// Some instructions act as place holders to emulate operations that the GPU
112a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)// hardware does automatically. This function can be used to check if
113a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)// an opcode falls into this category.
1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const {
1155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  switch (Opcode) {
116010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  default: return false;
117a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case AMDGPU::RETURN:
1185f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return true;
1195f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
120a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}
1215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
122a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
123a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  return false;
1243551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)}
1253551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1263551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
1273551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  switch(Opcode) {
1283551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    default: return false;
1293551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    case AMDGPU::CUBE_r600_pseudo:
1303551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    case AMDGPU::CUBE_r600_real:
1313551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    case AMDGPU::CUBE_eg_pseudo:
132010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    case AMDGPU::CUBE_eg_real:
1333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)      return true;
1343551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  }
1353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)}
1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1375f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
138a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  unsigned TargetFlags = get(Opcode).TSFlags;
1395f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1405f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return (TargetFlags & R600_InstFlag::ALU_INST);
1415f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
1423551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1433551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  unsigned TargetFlags = get(Opcode).TSFlags;
1455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return ((TargetFlags & R600_InstFlag::OP1) |
1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)          (TargetFlags & R600_InstFlag::OP2) |
1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)          (TargetFlags & R600_InstFlag::OP3));
1495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
1506e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)
1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
1526e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)  unsigned TargetFlags = get(Opcode).TSFlags;
1535f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1545f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return ((TargetFlags & R600_InstFlag::LDS_1A) |
1555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)          (TargetFlags & R600_InstFlag::LDS_1A1D) |
1565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)          (TargetFlags & R600_InstFlag::LDS_1A2D));
1575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
1585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
159cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
1605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
1615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
1621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
1645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
165a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
1665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (isALUInstr(MI->getOpcode()))
1695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return true;
1705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (isVector(*MI) || isCubeOp(MI->getOpcode()))
1715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return true;
1725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  switch (MI->getOpcode()) {
1735f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  case AMDGPU::PRED_X:
1745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  case AMDGPU::INTERP_PAIR_XY:
175116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  case AMDGPU::INTERP_PAIR_ZW:
176c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  case AMDGPU::INTERP_VEC_LOAD:
177c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  case AMDGPU::COPY:
178ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  case AMDGPU::DOT_4:
179a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    return true;
1804e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  default:
181a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    return false;
1824e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  }
183c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
1846e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)
185c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
186c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (ST.hasCaymanISA())
187c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return false;
1885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
1896e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)}
1905f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
191c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
1927d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  return isTransOnly(MI->getOpcode());
1931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
1945f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
1958bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
1968bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
1978bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles)}
198a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
199a02191e04bc25c4935f804f2c080ae28663d096dBen Murdochbool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
200a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return isVectorOnly(MI->getOpcode());
201a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
2025f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2035f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::isExport(unsigned Opcode) const {
2045f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
20590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)}
2065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
207c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
208a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return ST.hasVertexCache() && IS_VTX(get(Opcode));
209a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
210a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
2115f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
2125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
2135f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
214a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch}
2155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
216a3f7b4e666c476898878fa745f637129375cd889Ben Murdochbool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
217a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
218a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch}
219a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
2205f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
2215f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
2225f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
223a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch         usesTextureCache(MI->getOpcode());
2245f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
225a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
2261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
2275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  switch (Opcode) {
228a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  case AMDGPU::KILLGT:
229a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  case AMDGPU::GROUP_BARRIER:
230a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return true;
2315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  default:
2325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return false;
2335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
234ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch}
2355f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
236ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdochbool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
2375c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  return  MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
2385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
2395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
2415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
2425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
2435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
2455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!isALUInstr(MI->getOpcode())) {
2466e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    return false;
2475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
2486e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
2495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                                        E = MI->operands_end(); I != E; ++I) {
250116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    if (!I->isReg() || !I->isUse() ||
251116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        TargetRegisterInfo::isVirtualRegister(I->getReg()))
2525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      continue;
2535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
2555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return true;
2561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
2575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return false;
2585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
2595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
2615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  static const unsigned OpTable[] = {
2625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    AMDGPU::OpName::src0,
263a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    AMDGPU::OpName::src1,
264a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    AMDGPU::OpName::src2
265a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  };
2665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
2675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  assert (SrcNum < 3);
2685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return getOperandIdx(Opcode, OpTable[SrcNum]);
2695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
2705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2715c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu#define SRC_SEL_ROWS 11
2725c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liuint R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
273cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
274cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
2755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
2765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
2775f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
2785f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
2795f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
2805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
2815f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
2825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
2835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
28490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
2852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  };
2867d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
2882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
2892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return getOperandIdx(Opcode, SrcSelTable[i][1]);
2902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
29190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  }
2922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return -1;
2937d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)}
2942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#undef SRC_SEL_ROWS
2952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)SmallVector<std::pair<MachineOperand *, int64_t>, 3>
2972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)R600InstrInfo::getSrcs(MachineInstr *MI) const {
298010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
2992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
300ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  if (MI->getOpcode() == AMDGPU::DOT_4) {
30190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    static const unsigned OpTable[8][2] = {
3024e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
3032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
3042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
3052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
3062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
3072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
30890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
30990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
31090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    };
31190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
3122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    for (unsigned j = 0; j < 8; j++) {
3132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
3142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                                        OpTable[j][0]));
315010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      unsigned Reg = MO.getReg();
3162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (Reg == AMDGPU::ALU_CONST) {
3172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(),
318a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                                                    OpTable[j][1])).getImm();
3192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
3202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        continue;
321010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      }
3222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
3242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return Result;
3257d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  }
3261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3275f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  static const unsigned OpTable[3][2] = {
328a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
3295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
3305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
33190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  };
33290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
3332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (unsigned j = 0; j < 3; j++) {
3342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
3352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (SrcIdx < 0)
3362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
3372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    MachineOperand &MO = MI->getOperand(SrcIdx);
338ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    unsigned Reg = MI->getOperand(SrcIdx).getReg();
3392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Reg == AMDGPU::ALU_CONST) {
3404e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)      unsigned Sel = MI->getOperand(
3412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
3422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel));
3432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      continue;
3442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
345010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (Reg == AMDGPU::ALU_LITERAL_X) {
3462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      unsigned Imm = MI->getOperand(
3472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm();
3482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm));
3492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      continue;
3502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
3512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0));
3522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
3532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return Result;
3542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
355010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
3567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)std::vector<std::pair<int, unsigned> >
3571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciR600InstrInfo::ExtractSrcs(MachineInstr *MI,
3585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                           const DenseMap<unsigned, unsigned> &PV,
359a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                           unsigned &ConstCount) const {
3605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ConstCount = 0;
3615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
3622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const std::pair<int, unsigned> DummyPair(-1, 0);
3632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::vector<std::pair<int, unsigned> > Result;
3642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned i = 0;
3652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (unsigned n = Srcs.size(); i < n; ++i) {
366116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    unsigned Reg = Srcs[i].first->getReg();
3672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    unsigned Index = RI.getEncodingValue(Reg) & 0xff;
3682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (Reg == AMDGPU::OQAP) {
369ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch      Result.push_back(std::pair<int, unsigned>(Index, 0));
3702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
3714e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    if (PV.find(Reg) != PV.end()) {
3722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // 255 is used to tells its a PS/PV reg
3732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Result.push_back(std::pair<int, unsigned>(255, 0));
3742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      continue;
375010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    }
376010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (Index > 127) {
3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      ConstCount++;
3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      Result.push_back(DummyPair);
379a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      continue;
3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
381116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    unsigned Chan = RI.getHWRegChan(Reg);
3822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Result.push_back(std::pair<int, unsigned>(Index, Chan));
383010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
3842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (; i < 3; ++i)
3852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    Result.push_back(DummyPair);
386010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  return Result;
3877d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)}
3881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
3895f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)static std::vector<std::pair<int, unsigned> >
390a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)Swizzle(std::vector<std::pair<int, unsigned> > Src,
3915f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        R600InstrInfo::BankSwizzle Swz) {
3925f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (Src[0] == Src[1])
393a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    Src[1].first = -1;
394a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  switch (Swz) {
395a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_012_SCL_210:
396a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
397a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_021_SCL_122:
398a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    std::swap(Src[1], Src[2]);
399a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
4004e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  case R600InstrInfo::ALU_VEC_102_SCL_221:
401a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    std::swap(Src[0], Src[1]);
402a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
403a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_120_SCL_212:
404010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    std::swap(Src[0], Src[1]);
405010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    std::swap(Src[0], Src[2]);
406a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
407a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_201:
408a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    std::swap(Src[0], Src[2]);
4091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    std::swap(Src[0], Src[1]);
410a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
411a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_210:
412a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    std::swap(Src[0], Src[2]);
413a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
414a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  }
415a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  return Src;
416a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)}
417010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)
418a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)static unsigned
419a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
420010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  switch (Swz) {
421a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_012_SCL_210: {
4221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned Cycles[3] = { 2, 1, 0};
4235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return Cycles[Op];
424a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
4255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  case R600InstrInfo::ALU_VEC_021_SCL_122: {
4265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    unsigned Cycles[3] = { 1, 2, 2};
427a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return Cycles[Op];
428a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
429a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_120_SCL_212: {
430a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    unsigned Cycles[3] = { 2, 1, 2};
431a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return Cycles[Op];
432a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
433a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  case R600InstrInfo::ALU_VEC_102_SCL_221: {
434a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    unsigned Cycles[3] = { 2, 2, 1};
435a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return Cycles[Op];
436a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
4376e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)  default:
438a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    llvm_unreachable("Wrong Swizzle for Trans Slot");
439a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    return 0;
440a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
441a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
442effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
443a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
444effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch/// in the same Instruction Group while meeting read port limitations given a
445effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch/// Swz swizzle sequence.
4466e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)unsigned  R600InstrInfo::isLegalUpTo(
447a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
448a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
449a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const std::vector<std::pair<int, unsigned> > &TransSrcs,
450a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    R600InstrInfo::BankSwizzle TransSwz) const {
4511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  int Vector[4][3];
4525f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  memset(Vector, -1, sizeof(Vector));
453a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
454a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const std::vector<std::pair<int, unsigned> > &Srcs =
455a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        Swizzle(IGSrcs[i], Swz[i]);
456effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch    for (unsigned j = 0; j < 3; j++) {
4575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      const std::pair<int, unsigned> &Src = Srcs[j];
4585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      if (Src.first < 0 || Src.first == 255)
459a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        continue;
460a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
461a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
462a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
463a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            // The value from output queue A (denoted by register OQAP) can
464a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            // only be fetched during the first cycle.
465a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)            return false;
466a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        }
467a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        // OQAP does not count towards the normal read port restrictions
468a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        continue;
469a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      }
470a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      if (Vector[Src.second][j] < 0)
471010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)        Vector[Src.second][j] = Src.first;
472010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)      if (Vector[Src.second][j] != Src.first)
473a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        return i;
474a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    }
475010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)  }
476cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  // Now check Trans Alu
477a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
478a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const std::pair<int, unsigned> &Src = TransSrcs[i];
479a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    unsigned Cycle = getTransSwizzle(TransSwz, i);
480a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (Src.first < 0)
481a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      continue;
482010d83a9304c5a91596085d917d248abff47903aTorne (Richard Coles)    if (Src.first == 255)
483a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      continue;
4841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (Vector[Src.second][Cycle] < 0)
4855f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      Vector[Src.second][Cycle] = Src.first;
486a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    if (Vector[Src.second][Cycle] != Src.first)
4875f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      return IGSrcs.size() - 1;
4885f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
489a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return IGSrcs.size();
490cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)}
491cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
492cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
493cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)/// (in lexicographic term) swizzle sequence assuming that all swizzles after
494cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)/// Idx can be skipped
495cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)static bool
496cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)NextPossibleSolution(
497cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
498cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    unsigned Idx) {
499cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  assert(Idx < SwzCandidate.size());
500cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  int ResetIdx = Idx;
501cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
502cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    ResetIdx --;
503cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
504cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
505cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  }
506cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  if (ResetIdx == -1)
507cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    return false;
508cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
509cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
510cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  return true;
511cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)}
512cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
513cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)/// Enumerate all possible Swizzle sequence to find one that can meet all
514cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)/// read port requirements.
515cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)bool R600InstrInfo::FindSwizzleForVectorSlot(
516cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
517cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
518cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    const std::vector<std::pair<int, unsigned> > &TransSrcs,
519cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    R600InstrInfo::BankSwizzle TransSwz) const {
520cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  unsigned ValidUpTo = 0;
521cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  do {
5221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
5235f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (ValidUpTo == IGSrcs.size())
524cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      return true;
5255f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
5265f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  return false;
527cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)}
5285f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
529cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
530f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)/// a const, and can't read a gpr at cycle 1 if they read 2 const.
531f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)static bool
532f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
533f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                  const std::vector<std::pair<int, unsigned> > &TransOps,
534f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                  unsigned ConstCount) {
535f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // TransALU can't read 3 constants
536f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (ConstCount > 2)
537f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
538f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
5396e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    const std::pair<int, unsigned> &Src = TransOps[i];
540f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    unsigned Cycle = getTransSwizzle(TransSwz, i);
541f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (Src.first < 0)
542f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      continue;
543f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (ConstCount > 0 && Cycle == 0)
544f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return false;
545f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (ConstCount > 1 && Cycle == 1)
546f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return false;
547f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
548f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return true;
549f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
550f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
5516e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)bool
552f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
553f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                       const DenseMap<unsigned, unsigned> &PV,
554f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                       std::vector<BankSwizzle> &ValidSwizzle,
5555f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)                                       bool isLastAluTrans)
5565f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    const {
5575f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  //Todo : support shared src0 - src1 operand
5585f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
5595f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
5605f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ValidSwizzle.clear();
5615f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  unsigned ConstCount;
5625f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  BankSwizzle TransBS = ALU_VEC_012_SCL_210;
5635f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  for (unsigned i = 0, e = IG.size(); i < e; ++i) {
5645f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
5655f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    unsigned Op = getOperandIdx(IG[i]->getOpcode(),
5665f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)        AMDGPU::OpName::bank_swizzle);
5675f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
568f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        IG[i]->getOperand(Op).getImm());
569f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
570f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  std::vector<std::pair<int, unsigned> > TransOps;
5711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (!isLastAluTrans)
5725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
573f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
574f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  TransOps = IGSrcs.back();
575f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  IGSrcs.pop_back();
576f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  ValidSwizzle.pop_back();
577f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
578f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  static const R600InstrInfo::BankSwizzle TransSwz[] = {
5791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    ALU_VEC_012_SCL_210,
5805f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    ALU_VEC_021_SCL_122,
581f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    ALU_VEC_120_SCL_212,
582f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    ALU_VEC_102_SCL_221
583116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  };
5841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (unsigned i = 0; i < 4; i++) {
585116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    TransBS = TransSwz[i];
586116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    if (!isConstCompatible(TransBS, TransOps, ConstCount))
587116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      continue;
588116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
589116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch        TransBS);
5901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (Result) {
591116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      ValidSwizzle.push_back(TransBS);
592116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      return true;
593116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    }
594116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  }
595116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
596116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  return false;
597116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch}
598116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
599116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch
6001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibool
6011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciR600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
6021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    const {
603116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  assert (Consts.size() <= 12 && "Too many operands in instructions group");
604116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch  unsigned Pair1 = 0, Pair2 = 0;
6051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
6065f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    unsigned ReadConstHalf = Consts[i] & 2;
6071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned ReadConstIndex = Consts[i] & (~3);
6081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
6091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (!Pair1) {
6101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      Pair1 = ReadHalfConst;
611116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      continue;
6125f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    }
6131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (Pair1 == ReadHalfConst)
6141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      continue;
6155f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    if (!Pair2) {
616116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      Pair2 = ReadHalfConst;
617116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch      continue;
618116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch    }
6191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (Pair2 != ReadHalfConst)
6201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return false;
6211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  }
6221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return true;
6231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
6241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
6251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibool
6261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciR600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
6271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    const {
6281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  std::vector<unsigned> Consts;
6291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  SmallSet<int64_t, 4> Literals;
6301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
631    MachineInstr *MI = MIs[i];
632    if (!isALUInstr(MI->getOpcode()))
633      continue;
634
635    const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs =
636        getSrcs(MI);
637
638    for (unsigned j = 0, e = Srcs.size(); j < e; j++) {
639      std::pair<MachineOperand *, unsigned> Src = Srcs[j];
640      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
641        Literals.insert(Src.second);
642      if (Literals.size() > 4)
643        return false;
644      if (Src.first->getReg() == AMDGPU::ALU_CONST)
645        Consts.push_back(Src.second);
646      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
647          AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
648        unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
649        unsigned Chan = RI.getHWRegChan(Src.first->getReg());
650        Consts.push_back((Index << 2) | Chan);
651      }
652    }
653  }
654  return fitsConstReadLimitations(Consts);
655}
656
657DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
658    const ScheduleDAG *DAG) const {
659  const InstrItineraryData *II = TM->getInstrItineraryData();
660  return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
661}
662
663static bool
664isPredicateSetter(unsigned Opcode) {
665  switch (Opcode) {
666  case AMDGPU::PRED_X:
667    return true;
668  default:
669    return false;
670  }
671}
672
673static MachineInstr *
674findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
675                             MachineBasicBlock::iterator I) {
676  while (I != MBB.begin()) {
677    --I;
678    MachineInstr *MI = I;
679    if (isPredicateSetter(MI->getOpcode()))
680      return MI;
681  }
682
683  return nullptr;
684}
685
686static
687bool isJump(unsigned Opcode) {
688  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
689}
690
691static bool isBranch(unsigned Opcode) {
692  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
693      Opcode == AMDGPU::BRANCH_COND_f32;
694}
695
696bool
697R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
698                             MachineBasicBlock *&TBB,
699                             MachineBasicBlock *&FBB,
700                             SmallVectorImpl<MachineOperand> &Cond,
701                             bool AllowModify) const {
702  // Most of the following comes from the ARM implementation of AnalyzeBranch
703
704  // If the block has no terminators, it just falls into the block after it.
705  MachineBasicBlock::iterator I = MBB.end();
706  if (I == MBB.begin())
707    return false;
708  --I;
709  while (I->isDebugValue()) {
710    if (I == MBB.begin())
711      return false;
712    --I;
713  }
714  // AMDGPU::BRANCH* instructions are only available after isel and are not
715  // handled
716  if (isBranch(I->getOpcode()))
717    return true;
718  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
719    return false;
720  }
721
722  // Remove successive JUMP
723  while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
724      MachineBasicBlock::iterator PriorI = std::prev(I);
725      if (AllowModify)
726        I->removeFromParent();
727      I = PriorI;
728  }
729  MachineInstr *LastInst = I;
730
731  // If there is only one terminator instruction, process it.
732  unsigned LastOpc = LastInst->getOpcode();
733  if (I == MBB.begin() ||
734          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
735    if (LastOpc == AMDGPU::JUMP) {
736      TBB = LastInst->getOperand(0).getMBB();
737      return false;
738    } else if (LastOpc == AMDGPU::JUMP_COND) {
739      MachineInstr *predSet = I;
740      while (!isPredicateSetter(predSet->getOpcode())) {
741        predSet = --I;
742      }
743      TBB = LastInst->getOperand(0).getMBB();
744      Cond.push_back(predSet->getOperand(1));
745      Cond.push_back(predSet->getOperand(2));
746      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
747      return false;
748    }
749    return true;  // Can't handle indirect branch.
750  }
751
752  // Get the instruction before it if it is a terminator.
753  MachineInstr *SecondLastInst = I;
754  unsigned SecondLastOpc = SecondLastInst->getOpcode();
755
756  // If the block ends with a B and a Bcc, handle it.
757  if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
758    MachineInstr *predSet = --I;
759    while (!isPredicateSetter(predSet->getOpcode())) {
760      predSet = --I;
761    }
762    TBB = SecondLastInst->getOperand(0).getMBB();
763    FBB = LastInst->getOperand(0).getMBB();
764    Cond.push_back(predSet->getOperand(1));
765    Cond.push_back(predSet->getOperand(2));
766    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
767    return false;
768  }
769
770  // Otherwise, can't handle this.
771  return true;
772}
773
774static
775MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
776  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
777      It != E; ++It) {
778    if (It->getOpcode() == AMDGPU::CF_ALU ||
779        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
780      return std::prev(It.base());
781  }
782  return MBB.end();
783}
784
785unsigned
786R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
787                            MachineBasicBlock *TBB,
788                            MachineBasicBlock *FBB,
789                            const SmallVectorImpl<MachineOperand> &Cond,
790                            DebugLoc DL) const {
791  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
792
793  if (!FBB) {
794    if (Cond.empty()) {
795      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
796      return 1;
797    } else {
798      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
799      assert(PredSet && "No previous predicate !");
800      addFlag(PredSet, 0, MO_FLAG_PUSH);
801      PredSet->getOperand(2).setImm(Cond[1].getImm());
802
803      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
804             .addMBB(TBB)
805             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
806      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
807      if (CfAlu == MBB.end())
808        return 1;
809      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
810      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
811      return 1;
812    }
813  } else {
814    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
815    assert(PredSet && "No previous predicate !");
816    addFlag(PredSet, 0, MO_FLAG_PUSH);
817    PredSet->getOperand(2).setImm(Cond[1].getImm());
818    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
819            .addMBB(TBB)
820            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
821    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
822    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
823    if (CfAlu == MBB.end())
824      return 2;
825    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
826    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
827    return 2;
828  }
829}
830
831unsigned
832R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
833
834  // Note : we leave PRED* instructions there.
835  // They may be needed when predicating instructions.
836
837  MachineBasicBlock::iterator I = MBB.end();
838
839  if (I == MBB.begin()) {
840    return 0;
841  }
842  --I;
843  switch (I->getOpcode()) {
844  default:
845    return 0;
846  case AMDGPU::JUMP_COND: {
847    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
848    clearFlag(predSet, 0, MO_FLAG_PUSH);
849    I->eraseFromParent();
850    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
851    if (CfAlu == MBB.end())
852      break;
853    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
854    CfAlu->setDesc(get(AMDGPU::CF_ALU));
855    break;
856  }
857  case AMDGPU::JUMP:
858    I->eraseFromParent();
859    break;
860  }
861  I = MBB.end();
862
863  if (I == MBB.begin()) {
864    return 1;
865  }
866  --I;
867  switch (I->getOpcode()) {
868    // FIXME: only one case??
869  default:
870    return 1;
871  case AMDGPU::JUMP_COND: {
872    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
873    clearFlag(predSet, 0, MO_FLAG_PUSH);
874    I->eraseFromParent();
875    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
876    if (CfAlu == MBB.end())
877      break;
878    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
879    CfAlu->setDesc(get(AMDGPU::CF_ALU));
880    break;
881  }
882  case AMDGPU::JUMP:
883    I->eraseFromParent();
884    break;
885  }
886  return 2;
887}
888
889bool
890R600InstrInfo::isPredicated(const MachineInstr *MI) const {
891  int idx = MI->findFirstPredOperandIdx();
892  if (idx < 0)
893    return false;
894
895  unsigned Reg = MI->getOperand(idx).getReg();
896  switch (Reg) {
897  default: return false;
898  case AMDGPU::PRED_SEL_ONE:
899  case AMDGPU::PRED_SEL_ZERO:
900  case AMDGPU::PREDICATE_BIT:
901    return true;
902  }
903}
904
905bool
906R600InstrInfo::isPredicable(MachineInstr *MI) const {
907  // XXX: KILL* instructions can be predicated, but they must be the last
908  // instruction in a clause, so this means any instructions after them cannot
909  // be predicated.  Until we have proper support for instruction clauses in the
910  // backend, we will mark KILL* instructions as unpredicable.
911
912  if (MI->getOpcode() == AMDGPU::KILLGT) {
913    return false;
914  } else if (MI->getOpcode() == AMDGPU::CF_ALU) {
915    // If the clause start in the middle of MBB then the MBB has more
916    // than a single clause, unable to predicate several clauses.
917    if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI))
918      return false;
919    // TODO: We don't support KC merging atm
920    if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0)
921      return false;
922    return true;
923  } else if (isVector(*MI)) {
924    return false;
925  } else {
926    return AMDGPUInstrInfo::isPredicable(MI);
927  }
928}
929
930
931bool
932R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
933                                   unsigned NumCyles,
934                                   unsigned ExtraPredCycles,
935                                   const BranchProbability &Probability) const{
936  return true;
937}
938
939bool
940R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
941                                   unsigned NumTCycles,
942                                   unsigned ExtraTCycles,
943                                   MachineBasicBlock &FMBB,
944                                   unsigned NumFCycles,
945                                   unsigned ExtraFCycles,
946                                   const BranchProbability &Probability) const {
947  return true;
948}
949
950bool
951R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
952                                         unsigned NumCyles,
953                                         const BranchProbability &Probability)
954                                         const {
955  return true;
956}
957
958bool
959R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
960                                         MachineBasicBlock &FMBB) const {
961  return false;
962}
963
964
965bool
966R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
967  MachineOperand &MO = Cond[1];
968  switch (MO.getImm()) {
969  case OPCODE_IS_ZERO_INT:
970    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
971    break;
972  case OPCODE_IS_NOT_ZERO_INT:
973    MO.setImm(OPCODE_IS_ZERO_INT);
974    break;
975  case OPCODE_IS_ZERO:
976    MO.setImm(OPCODE_IS_NOT_ZERO);
977    break;
978  case OPCODE_IS_NOT_ZERO:
979    MO.setImm(OPCODE_IS_ZERO);
980    break;
981  default:
982    return true;
983  }
984
985  MachineOperand &MO2 = Cond[2];
986  switch (MO2.getReg()) {
987  case AMDGPU::PRED_SEL_ZERO:
988    MO2.setReg(AMDGPU::PRED_SEL_ONE);
989    break;
990  case AMDGPU::PRED_SEL_ONE:
991    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
992    break;
993  default:
994    return true;
995  }
996  return false;
997}
998
999bool
1000R600InstrInfo::DefinesPredicate(MachineInstr *MI,
1001                                std::vector<MachineOperand> &Pred) const {
1002  return isPredicateSetter(MI->getOpcode());
1003}
1004
1005
1006bool
1007R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
1008                       const SmallVectorImpl<MachineOperand> &Pred2) const {
1009  return false;
1010}
1011
1012
1013bool
1014R600InstrInfo::PredicateInstruction(MachineInstr *MI,
1015                      const SmallVectorImpl<MachineOperand> &Pred) const {
1016  int PIdx = MI->findFirstPredOperandIdx();
1017
1018  if (MI->getOpcode() == AMDGPU::CF_ALU) {
1019    MI->getOperand(8).setImm(0);
1020    return true;
1021  }
1022
1023  if (MI->getOpcode() == AMDGPU::DOT_4) {
1024    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
1025        .setReg(Pred[2].getReg());
1026    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
1027        .setReg(Pred[2].getReg());
1028    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
1029        .setReg(Pred[2].getReg());
1030    MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
1031        .setReg(Pred[2].getReg());
1032    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1033    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1034    return true;
1035  }
1036
1037  if (PIdx != -1) {
1038    MachineOperand &PMO = MI->getOperand(PIdx);
1039    PMO.setReg(Pred[2].getReg());
1040    MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1041    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
1042    return true;
1043  }
1044
1045  return false;
1046}
1047
1048unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
1049  return 2;
1050}
1051
1052unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
1053                                            const MachineInstr *MI,
1054                                            unsigned *PredCost) const {
1055  if (PredCost)
1056    *PredCost = 2;
1057  return 2;
1058}
1059
1060bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
1061
1062  switch(MI->getOpcode()) {
1063  default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
1064  case AMDGPU::R600_EXTRACT_ELT_V2:
1065  case AMDGPU::R600_EXTRACT_ELT_V4:
1066    buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
1067                      RI.getHWRegIndex(MI->getOperand(1).getReg()), //  Address
1068                      MI->getOperand(2).getReg(),
1069                      RI.getHWRegChan(MI->getOperand(1).getReg()));
1070    break;
1071  case AMDGPU::R600_INSERT_ELT_V2:
1072  case AMDGPU::R600_INSERT_ELT_V4:
1073    buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
1074                       RI.getHWRegIndex(MI->getOperand(1).getReg()),  // Address
1075                       MI->getOperand(3).getReg(),                    // Offset
1076                       RI.getHWRegChan(MI->getOperand(1).getReg()));  // Channel
1077    break;
1078  }
1079  MI->eraseFromParent();
1080  return true;
1081}
1082
1083void  R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1084                                             const MachineFunction &MF) const {
1085  const AMDGPUFrameLowering *TFL =
1086    static_cast<const AMDGPUFrameLowering*>(
1087    MF.getTarget().getFrameLowering());
1088
1089  unsigned StackWidth = TFL->getStackWidth(MF);
1090  int End = getIndirectIndexEnd(MF);
1091
1092  if (End == -1)
1093    return;
1094
1095  for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
1096    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1097    Reserved.set(SuperReg);
1098    for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
1099      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1100      Reserved.set(Reg);
1101    }
1102  }
1103}
1104
1105unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1106                                                 unsigned Channel) const {
1107  // XXX: Remove when we support a stack width > 2
1108  assert(Channel == 0);
1109  return RegIndex;
1110}
1111
1112const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
1113  return &AMDGPU::R600_TReg32_XRegClass;
1114}
1115
1116MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1117                                       MachineBasicBlock::iterator I,
1118                                       unsigned ValueReg, unsigned Address,
1119                                       unsigned OffsetReg) const {
1120  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
1121}
1122
1123MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1124                                       MachineBasicBlock::iterator I,
1125                                       unsigned ValueReg, unsigned Address,
1126                                       unsigned OffsetReg,
1127                                       unsigned AddrChan) const {
1128  unsigned AddrReg;
1129  switch (AddrChan) {
1130    default: llvm_unreachable("Invalid Channel");
1131    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1132    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1133    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1134    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1135  }
1136  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1137                                               AMDGPU::AR_X, OffsetReg);
1138  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
1139
1140  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1141                                      AddrReg, ValueReg)
1142                                      .addReg(AMDGPU::AR_X,
1143                                           RegState::Implicit | RegState::Kill);
1144  setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1);
1145  return Mov;
1146}
1147
1148MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1149                                       MachineBasicBlock::iterator I,
1150                                       unsigned ValueReg, unsigned Address,
1151                                       unsigned OffsetReg) const {
1152  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
1153}
1154
1155MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1156                                       MachineBasicBlock::iterator I,
1157                                       unsigned ValueReg, unsigned Address,
1158                                       unsigned OffsetReg,
1159                                       unsigned AddrChan) const {
1160  unsigned AddrReg;
1161  switch (AddrChan) {
1162    default: llvm_unreachable("Invalid Channel");
1163    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1164    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1165    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1166    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1167  }
1168  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1169                                                       AMDGPU::AR_X,
1170                                                       OffsetReg);
1171  setImmOperand(MOVA, AMDGPU::OpName::write, 0);
1172  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1173                                      ValueReg,
1174                                      AddrReg)
1175                                      .addReg(AMDGPU::AR_X,
1176                                           RegState::Implicit | RegState::Kill);
1177  setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1);
1178
1179  return Mov;
1180}
1181
1182unsigned R600InstrInfo::getMaxAlusPerClause() const {
1183  return 115;
1184}
1185
1186MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
1187                                                  MachineBasicBlock::iterator I,
1188                                                  unsigned Opcode,
1189                                                  unsigned DstReg,
1190                                                  unsigned Src0Reg,
1191                                                  unsigned Src1Reg) const {
1192  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1193    DstReg);           // $dst
1194
1195  if (Src1Reg) {
1196    MIB.addImm(0)     // $update_exec_mask
1197       .addImm(0);    // $update_predicate
1198  }
1199  MIB.addImm(1)        // $write
1200     .addImm(0)        // $omod
1201     .addImm(0)        // $dst_rel
1202     .addImm(0)        // $dst_clamp
1203     .addReg(Src0Reg)  // $src0
1204     .addImm(0)        // $src0_neg
1205     .addImm(0)        // $src0_rel
1206     .addImm(0)        // $src0_abs
1207     .addImm(-1);       // $src0_sel
1208
1209  if (Src1Reg) {
1210    MIB.addReg(Src1Reg) // $src1
1211       .addImm(0)       // $src1_neg
1212       .addImm(0)       // $src1_rel
1213       .addImm(0)       // $src1_abs
1214       .addImm(-1);      // $src1_sel
1215  }
1216
1217  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1218  //scheduling to the backend, we can change the default to 0.
1219  MIB.addImm(1)        // $last
1220      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1221      .addImm(0)         // $literal
1222      .addImm(0);        // $bank_swizzle
1223
1224  return MIB;
1225}
1226
1227#define OPERAND_CASE(Label) \
1228  case Label: { \
1229    static const unsigned Ops[] = \
1230    { \
1231      Label##_X, \
1232      Label##_Y, \
1233      Label##_Z, \
1234      Label##_W \
1235    }; \
1236    return Ops[Slot]; \
1237  }
1238
1239static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
1240  switch (Op) {
1241  OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
1242  OPERAND_CASE(AMDGPU::OpName::update_pred)
1243  OPERAND_CASE(AMDGPU::OpName::write)
1244  OPERAND_CASE(AMDGPU::OpName::omod)
1245  OPERAND_CASE(AMDGPU::OpName::dst_rel)
1246  OPERAND_CASE(AMDGPU::OpName::clamp)
1247  OPERAND_CASE(AMDGPU::OpName::src0)
1248  OPERAND_CASE(AMDGPU::OpName::src0_neg)
1249  OPERAND_CASE(AMDGPU::OpName::src0_rel)
1250  OPERAND_CASE(AMDGPU::OpName::src0_abs)
1251  OPERAND_CASE(AMDGPU::OpName::src0_sel)
1252  OPERAND_CASE(AMDGPU::OpName::src1)
1253  OPERAND_CASE(AMDGPU::OpName::src1_neg)
1254  OPERAND_CASE(AMDGPU::OpName::src1_rel)
1255  OPERAND_CASE(AMDGPU::OpName::src1_abs)
1256  OPERAND_CASE(AMDGPU::OpName::src1_sel)
1257  OPERAND_CASE(AMDGPU::OpName::pred_sel)
1258  default:
1259    llvm_unreachable("Wrong Operand");
1260  }
1261}
1262
1263#undef OPERAND_CASE
1264
1265MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
1266    MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1267    const {
1268  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1269  unsigned Opcode;
1270  if (ST.getGeneration() <= AMDGPUSubtarget::R700)
1271    Opcode = AMDGPU::DOT4_r600;
1272  else
1273    Opcode = AMDGPU::DOT4_eg;
1274  MachineBasicBlock::iterator I = MI;
1275  MachineOperand &Src0 = MI->getOperand(
1276      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1277  MachineOperand &Src1 = MI->getOperand(
1278      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1279  MachineInstr *MIB = buildDefaultInstruction(
1280      MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1281  static const unsigned  Operands[14] = {
1282    AMDGPU::OpName::update_exec_mask,
1283    AMDGPU::OpName::update_pred,
1284    AMDGPU::OpName::write,
1285    AMDGPU::OpName::omod,
1286    AMDGPU::OpName::dst_rel,
1287    AMDGPU::OpName::clamp,
1288    AMDGPU::OpName::src0_neg,
1289    AMDGPU::OpName::src0_rel,
1290    AMDGPU::OpName::src0_abs,
1291    AMDGPU::OpName::src0_sel,
1292    AMDGPU::OpName::src1_neg,
1293    AMDGPU::OpName::src1_rel,
1294    AMDGPU::OpName::src1_abs,
1295    AMDGPU::OpName::src1_sel,
1296  };
1297
1298  MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
1299      getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1300  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1301      .setReg(MO.getReg());
1302
1303  for (unsigned i = 0; i < 14; i++) {
1304    MachineOperand &MO = MI->getOperand(
1305        getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1306    assert (MO.isImm());
1307    setImmOperand(MIB, Operands[i], MO.getImm());
1308  }
1309  MIB->getOperand(20).setImm(0);
1310  return MIB;
1311}
1312
1313MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
1314                                         MachineBasicBlock::iterator I,
1315                                         unsigned DstReg,
1316                                         uint64_t Imm) const {
1317  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1318                                                  AMDGPU::ALU_LITERAL_X);
1319  setImmOperand(MovImm, AMDGPU::OpName::literal, Imm);
1320  return MovImm;
1321}
1322
1323MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
1324                                       MachineBasicBlock::iterator I,
1325                                       unsigned DstReg, unsigned SrcReg) const {
1326  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1327}
1328
1329int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1330  return getOperandIdx(MI.getOpcode(), Op);
1331}
1332
1333int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1334  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1335}
1336
1337void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op,
1338                                  int64_t Imm) const {
1339  int Idx = getOperandIdx(*MI, Op);
1340  assert(Idx != -1 && "Operand not supported for this instruction.");
1341  assert(MI->getOperand(Idx).isImm());
1342  MI->getOperand(Idx).setImm(Imm);
1343}
1344
1345//===----------------------------------------------------------------------===//
1346// Instruction flag getters/setters
1347//===----------------------------------------------------------------------===//
1348
1349bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const {
1350  return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
1351}
1352
1353MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx,
1354                                         unsigned Flag) const {
1355  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1356  int FlagIndex = 0;
1357  if (Flag != 0) {
1358    // If we pass something other than the default value of Flag to this
1359    // function, it means we are want to set a flag on an instruction
1360    // that uses native encoding.
1361    assert(HAS_NATIVE_OPERANDS(TargetFlags));
1362    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1363    switch (Flag) {
1364    case MO_FLAG_CLAMP:
1365      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp);
1366      break;
1367    case MO_FLAG_MASK:
1368      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write);
1369      break;
1370    case MO_FLAG_NOT_LAST:
1371    case MO_FLAG_LAST:
1372      FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last);
1373      break;
1374    case MO_FLAG_NEG:
1375      switch (SrcIdx) {
1376      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break;
1377      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break;
1378      case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break;
1379      }
1380      break;
1381
1382    case MO_FLAG_ABS:
1383      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1384                       "instructions.");
1385      (void)IsOP3;
1386      switch (SrcIdx) {
1387      case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break;
1388      case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break;
1389      }
1390      break;
1391
1392    default:
1393      FlagIndex = -1;
1394      break;
1395    }
1396    assert(FlagIndex != -1 && "Flag not supported for this instruction");
1397  } else {
1398      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1399      assert(FlagIndex != 0 &&
1400         "Instruction flags not supported for this instruction");
1401  }
1402
1403  MachineOperand &FlagOp = MI->getOperand(FlagIndex);
1404  assert(FlagOp.isImm());
1405  return FlagOp;
1406}
1407
1408void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
1409                            unsigned Flag) const {
1410  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1411  if (Flag == 0) {
1412    return;
1413  }
1414  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1415    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1416    if (Flag == MO_FLAG_NOT_LAST) {
1417      clearFlag(MI, Operand, MO_FLAG_LAST);
1418    } else if (Flag == MO_FLAG_MASK) {
1419      clearFlag(MI, Operand, Flag);
1420    } else {
1421      FlagOp.setImm(1);
1422    }
1423  } else {
1424      MachineOperand &FlagOp = getFlagOp(MI, Operand);
1425      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1426  }
1427}
1428
1429void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
1430                              unsigned Flag) const {
1431  unsigned TargetFlags = get(MI->getOpcode()).TSFlags;
1432  if (HAS_NATIVE_OPERANDS(TargetFlags)) {
1433    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1434    FlagOp.setImm(0);
1435  } else {
1436    MachineOperand &FlagOp = getFlagOp(MI);
1437    unsigned InstFlags = FlagOp.getImm();
1438    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1439    FlagOp.setImm(InstFlags);
1440  }
1441}
1442