125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//
325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//                     The LLVM Compiler Infrastructure
425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//
525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// This file is distributed under the University of Illinois Open Source
625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// License. See LICENSE.TXT for details.
725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//
825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//===----------------------------------------------------------------------===//
925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//
1025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// \file
1125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// This pass implements instructions packetization for R600. It unsets isLast
1225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// bit of instructions inside a bundle and substitutes src register with
1325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// PreviousVector when applicable.
1425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//
1525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//===----------------------------------------------------------------------===//
1625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
1725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/Support/Debug.h"
185c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "AMDGPU.h"
19cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "AMDGPUSubtarget.h"
205c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "R600InstrInfo.h"
2125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/DFAPacketizer.h"
2225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/MachineDominators.h"
235c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/CodeGen/MachineFunctionPass.h"
2425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/MachineLoopInfo.h"
255c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/CodeGen/Passes.h"
2625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/ScheduleDAG.h"
275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/Support/raw_ostream.h"
2825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
295c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramerusing namespace llvm;
305c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer
31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "packets"
32dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
335c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramernamespace {
3425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
3525f259cde28860ea76c2f5628010968945a28edbVincent Lejeuneclass R600Packetizer : public MachineFunctionPass {
3625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
3725f259cde28860ea76c2f5628010968945a28edbVincent Lejeunepublic:
3825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  static char ID;
3925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
4025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void getAnalysisUsage(AnalysisUsage &AU) const override {
4225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    AU.setPreservesCFG();
4325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    AU.addRequired<MachineDominatorTree>();
4425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    AU.addPreserved<MachineDominatorTree>();
4525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    AU.addRequired<MachineLoopInfo>();
4625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    AU.addPreserved<MachineLoopInfo>();
4725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    MachineFunctionPass::getAnalysisUsage(AU);
4825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
4925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const char *getPassName() const override {
5125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return "R600 Packetizer";
5225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
5325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool runOnMachineFunction(MachineFunction &Fn) override;
5525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune};
5625f259cde28860ea76c2f5628010968945a28edbVincent Lejeunechar R600Packetizer::ID = 0;
5725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
5825f259cde28860ea76c2f5628010968945a28edbVincent Lejeuneclass R600PacketizerList : public VLIWPacketizerList {
5925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
6025f259cde28860ea76c2f5628010968945a28edbVincent Lejeuneprivate:
6125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  const R600InstrInfo *TII;
6225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  const R600RegisterInfo &TRI;
63bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune  bool VLIW5;
64bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune  bool ConsideredInstUsesAlreadyWrittenVectorElement;
6525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
6625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  unsigned getSlot(const MachineInstr *MI) const {
6725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return TRI.getHWRegChan(MI->getOperand(0).getReg());
6825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
6925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
70152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune  /// \returns register to PV chan mapping for bundle/single instructions that
7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  /// immediately precedes I.
72152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune  DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
73152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      const {
74152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune    DenseMap<unsigned, unsigned> Result;
7525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    I--;
7625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
7725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      return Result;
7825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
7925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    if (I->isBundle())
8025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      BI++;
81bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    int LastDstChan = -1;
82152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune    do {
83bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      bool isTrans = false;
84bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      int BISlot = getSlot(BI);
85bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      if (LastDstChan >= BISlot)
86bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune        isTrans = true;
87bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      LastDstChan = BISlot;
88152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      if (TII->isPredicated(BI))
89152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        continue;
905e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
910c922879854f5a6ee60283b99c68089f76f94778Vincent Lejeune      if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
92152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        continue;
93cedcfee405a22b245e869abe8609f094df34085aTom Stellard      int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
94cedcfee405a22b245e869abe8609f094df34085aTom Stellard      if (DstIdx == -1) {
95cedcfee405a22b245e869abe8609f094df34085aTom Stellard        continue;
96cedcfee405a22b245e869abe8609f094df34085aTom Stellard      }
97cedcfee405a22b245e869abe8609f094df34085aTom Stellard      unsigned Dst = BI->getOperand(DstIdx).getReg();
98bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      if (isTrans || TII->isTransOnly(BI)) {
998f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        Result[Dst] = AMDGPU::PS;
1008f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        continue;
1018f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      }
1024ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune      if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
1034ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune          BI->getOpcode() == AMDGPU::DOT4_eg) {
104152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        Result[Dst] = AMDGPU::PV_X;
105152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        continue;
106152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      }
107e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard      if (Dst == AMDGPU::OQAP) {
108e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard        continue;
109e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard      }
110152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      unsigned PVReg = 0;
111152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      switch (TRI.getHWRegChan(Dst)) {
112152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      case 0:
113152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        PVReg = AMDGPU::PV_X;
114152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        break;
115152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      case 1:
116152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        PVReg = AMDGPU::PV_Y;
117152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        break;
118152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      case 2:
119152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        PVReg = AMDGPU::PV_Z;
120152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        break;
121152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      case 3:
122152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        PVReg = AMDGPU::PV_W;
123152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        break;
124152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      default:
125152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        llvm_unreachable("Invalid Chan");
126152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      }
127152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      Result[Dst] = PVReg;
128152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune    } while ((++BI)->isBundledWithPred());
12925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return Result;
13025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
13125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
132152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune  void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
133152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      const {
1345e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard    unsigned Ops[] = {
1355e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard      AMDGPU::OpName::src0,
1365e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard      AMDGPU::OpName::src1,
1375e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard      AMDGPU::OpName::src2
13825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    };
13925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    for (unsigned i = 0; i < 3; i++) {
14025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
14125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      if (OperandIdx < 0)
14225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        continue;
14325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      unsigned Src = MI->getOperand(OperandIdx).getReg();
144152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
145152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune      if (It != PVs.end())
146152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune        MI->getOperand(OperandIdx).setReg(It->second);
14725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    }
14825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
14925f259cde28860ea76c2f5628010968945a28edbVincent Lejeunepublic:
15025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // Ctor.
15125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
15225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune                        MachineDominatorTree &MDT)
15325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  : VLIWPacketizerList(MF, MLI, MDT, true),
15425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
155bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    TRI(TII->getRegisterInfo()) {
156bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
157bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune  }
15825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
15925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // initPacketizerState - initialize some internal flags.
160dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void initPacketizerState() override {
161bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    ConsideredInstUsesAlreadyWrittenVectorElement = false;
162bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune  }
16325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
16425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool ignorePseudoInstruction(MachineInstr *MI,
166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                               MachineBasicBlock *MBB) override {
16725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return false;
16825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
16925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
17025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // isSoloInstruction - return true if instruction MI can not be packetized
17125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // with any other instruction, which means that MI itself is a packet.
172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool isSoloInstruction(MachineInstr *MI) override {
17325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    if (TII->isVector(*MI))
17425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      return true;
17525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    if (!TII->isALUInstr(MI->getOpcode()))
17625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      return true;
177cedcfee405a22b245e869abe8609f094df34085aTom Stellard    if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
178cedcfee405a22b245e869abe8609f094df34085aTom Stellard      return true;
179a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune    // XXX: This can be removed once the packetizer properly handles all the
180a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune    // LDS instruction group restrictions.
181a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune    if (TII->isLDSInstr(MI->getOpcode()))
182a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune      return true;
18325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return false;
18425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
18525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
18625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
18725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // together.
188dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
18925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
190bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    if (getSlot(MII) == getSlot(MIJ))
191bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      ConsideredInstUsesAlreadyWrittenVectorElement = true;
19225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    // Does MII and MIJ share the same pred_sel ?
1935e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard    int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
1945e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard        OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
19525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
19625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
19725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    if (PredI != PredJ)
19825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      return false;
19925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    if (SUJ->isSucc(SUI)) {
20025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
20125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        const SDep &Dep = SUJ->Succs[i];
20225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        if (Dep.getSUnit() != SUI)
20325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          continue;
20425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        if (Dep.getKind() == SDep::Anti)
20525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          continue;
20625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        if (Dep.getKind() == SDep::Output)
20725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
20825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune            continue;
20925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        return false;
21025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      }
21125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    }
21204c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard
21304c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard    bool ARDef = TII->definesAddressRegister(MII) ||
21404c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard                 TII->definesAddressRegister(MIJ);
21504c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard    bool ARUse = TII->usesAddressRegister(MII) ||
21604c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard                 TII->usesAddressRegister(MIJ);
21704c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard    if (ARDef && ARUse)
21804c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard      return false;
21904c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard
22025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return true;
22125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
22225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
22325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
22425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // and SUJ.
225dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
226dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return false;
227dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
22825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
22925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
2305e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
23125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    MI->getOperand(LastOp).setImm(Bit);
23225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
23325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
2348f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune  bool isBundlableWithCurrentPMI(MachineInstr *MI,
2358f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune                                 const DenseMap<unsigned, unsigned> &PV,
2368f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune                                 std::vector<R600InstrInfo::BankSwizzle> &BS,
2378f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune                                 bool &isTransSlot) {
2388f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    isTransSlot = TII->isTransOnly(MI);
239bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    assert (!isTransSlot || VLIW5);
240bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune
241bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    // Is the dst reg sequence legal ?
242bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    if (!isTransSlot && !CurrentPacketMIs.empty()) {
243bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
244bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune        if (ConsideredInstUsesAlreadyWrittenVectorElement  &&
245bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune            !TII->isVectorOnly(MI) && VLIW5) {
246bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune          isTransSlot = true;
247bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune          DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
248bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune        }
249bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune        else
250bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune          return false;
251bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      }
252bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    }
2538f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune
2548f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    // Are the Constants limitations met ?
25525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    CurrentPacketMIs.push_back(MI);
2568f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
2578f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      DEBUG(
25825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        dbgs() << "Couldn't pack :\n";
25925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        MI->dump();
26025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        dbgs() << "with the following packets :\n";
26125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
26225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          CurrentPacketMIs[i]->dump();
26325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          dbgs() << "\n";
26425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        }
26525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        dbgs() << "because of Consts read limitations\n";
2668f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      );
2678f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      CurrentPacketMIs.pop_back();
2688f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      return false;
2698f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    }
2708f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune
2718f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    // Is there a BankSwizzle set that meet Read Port limitations ?
2728f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
2738f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune            PV, BS, isTransSlot)) {
2748f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      DEBUG(
27525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        dbgs() << "Couldn't pack :\n";
27625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        MI->dump();
27725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        dbgs() << "with the following packets :\n";
27825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
27925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          CurrentPacketMIs[i]->dump();
28025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          dbgs() << "\n";
28125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        }
28225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        dbgs() << "because of Read port limitations\n";
2838f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      );
2848f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      CurrentPacketMIs.pop_back();
2858f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      return false;
2868f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    }
2878f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune
288ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard    // We cannot read LDS source registrs from the Trans slot.
289ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard    if (isTransSlot && TII->readsLDSSrcReg(MI))
290ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard      return false;
291ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard
2928f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    CurrentPacketMIs.pop_back();
2938f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    return true;
2948f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune  }
2958f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune
296dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override {
2978f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    MachineBasicBlock::iterator FirstInBundle =
2988f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
2998f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    const DenseMap<unsigned, unsigned> &PV =
3008f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        getPreviousVector(FirstInBundle);
3018f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    std::vector<R600InstrInfo::BankSwizzle> BS;
3028f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    bool isTransSlot;
3038f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune
3048f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
30525c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune      for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
30625c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune        MachineInstr *MI = CurrentPacketMIs[i];
3078f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        unsigned Op = TII->getOperandIdx(MI->getOpcode(),
3088f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune            AMDGPU::OpName::bank_swizzle);
3098f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        MI->getOperand(Op).setImm(BS[i]);
31025c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune      }
3118f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
3128f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune          AMDGPU::OpName::bank_swizzle);
3138f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      MI->getOperand(Op).setImm(BS.back());
3148f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      if (!CurrentPacketMIs.empty())
3158f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune        setIsLastBit(CurrentPacketMIs.back(), 0);
3168f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      substitutePV(MI, PV);
3178f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
3188f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      if (isTransSlot) {
31936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        endPacket(std::next(It)->getParent(), std::next(It));
3208f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      }
3218f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune      return It;
32225c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune    }
3238f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune    endPacket(MI->getParent(), MI);
324bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune    if (TII->isTransOnly(MI))
325bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune      return MI;
32625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    return VLIWPacketizerList::addToPacket(MI);
32725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
32825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune};
32925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
33025f259cde28860ea76c2f5628010968945a28edbVincent Lejeunebool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
33125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
33225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
33325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
33425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
33525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // Instantiate the packetizer.
33625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  R600PacketizerList Packetizer(Fn, MLI, MDT);
33725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
33825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // DFA state table should not be empty.
33925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
34025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
34125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  //
34225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // Loop over all basic blocks and remove KILL pseudo-instructions
34325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // These instructions confuse the dependence analysis. Consider:
34425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // D0 = ...   (Insn 0)
34525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // R0 = KILL R0, D0 (Insn 1)
34625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // R0 = ... (Insn 2)
34725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // Here, Insn 1 will result in the dependence graph not emitting an output
34825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // dependence between Insn 0 and Insn 2. This can lead to incorrect
34925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // packetization
35025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  //
35125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
35225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune       MBB != MBBe; ++MBB) {
35325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    MachineBasicBlock::iterator End = MBB->end();
35425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    MachineBasicBlock::iterator MI = MBB->begin();
35525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    while (MI != End) {
356de28bdadff78ceea6bb05e23dc3b4cc92fa359edTom Stellard      if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
357f2cfef8172fd2eceb036b8caff50623a189ba2ffVincent Lejeune          (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
35825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        MachineBasicBlock::iterator DeleteMI = MI;
35925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        ++MI;
36025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        MBB->erase(DeleteMI);
36125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        End = MBB->end();
36225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        continue;
36325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      }
36425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      ++MI;
36525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    }
36625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
36725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
36825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  // Loop over all of the basic blocks.
36925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
37025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune       MBB != MBBe; ++MBB) {
37125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    // Find scheduling regions and schedule / packetize each region.
37225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    unsigned RemainingCount = MBB->size();
37325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
37425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        RegionEnd != MBB->begin();) {
37525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      // The next region starts above the previous region. Look backward in the
37625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      // instruction stream until we find the nearest boundary.
37725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      MachineBasicBlock::iterator I = RegionEnd;
37825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      for(;I != MBB->begin(); --I, --RemainingCount) {
37936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
38025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune          break;
38125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      }
38225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      I = MBB->begin();
38325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
38425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      // Skip empty scheduling regions.
38525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      if (I == RegionEnd) {
38636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        RegionEnd = std::prev(RegionEnd);
38725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        --RemainingCount;
38825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        continue;
38925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      }
39025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      // Skip regions with one instruction.
39136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      if (I == std::prev(RegionEnd)) {
39236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        RegionEnd = std::prev(RegionEnd);
39325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune        continue;
39425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      }
39525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
39625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
39725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune      RegionEnd = I;
39825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune    }
39925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  }
40025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
40125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  return true;
40225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
40325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune}
40425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
4055c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer} // end anonymous namespace
40625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune
40725f259cde28860ea76c2f5628010968945a28edbVincent Lejeunellvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
40825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune  return new R600Packetizer(tm);
40925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune}
410