125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// 325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// The LLVM Compiler Infrastructure 425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// 525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// This file is distributed under the University of Illinois Open Source 625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// License. See LICENSE.TXT for details. 725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// 825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//===----------------------------------------------------------------------===// 925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// 1025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// \file 1125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// This pass implements instructions packetization for R600. It unsets isLast 1225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// bit of instructions inside a bundle and substitutes src register with 1325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune/// PreviousVector when applicable. 1425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune// 1525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune//===----------------------------------------------------------------------===// 1625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 1725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/Support/Debug.h" 185c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "AMDGPU.h" 19c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines#include "AMDGPUSubtarget.h" 205c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "R600InstrInfo.h" 2125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/DFAPacketizer.h" 2225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/MachineDominators.h" 235c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/CodeGen/MachineFunctionPass.h" 2425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/MachineLoopInfo.h" 255c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/CodeGen/Passes.h" 2625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune#include "llvm/CodeGen/ScheduleDAG.h" 275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/Support/raw_ostream.h" 2825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 295c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramerusing namespace llvm; 305c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer 31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#define DEBUG_TYPE "packets" 32dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 335c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramernamespace { 3425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 3525f259cde28860ea76c2f5628010968945a28edbVincent Lejeuneclass R600Packetizer : public MachineFunctionPass { 3625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 3725f259cde28860ea76c2f5628010968945a28edbVincent Lejeunepublic: 3825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune static char ID; 3925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} 4025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void getAnalysisUsage(AnalysisUsage &AU) const override { 4225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune AU.setPreservesCFG(); 4325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune AU.addRequired<MachineDominatorTree>(); 4425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune AU.addPreserved<MachineDominatorTree>(); 4525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune AU.addRequired<MachineLoopInfo>(); 4625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune AU.addPreserved<MachineLoopInfo>(); 4725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineFunctionPass::getAnalysisUsage(AU); 4825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 4925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const char *getPassName() const override { 5125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return "R600 Packetizer"; 5225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 5325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool runOnMachineFunction(MachineFunction &Fn) override; 5525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune}; 5625f259cde28860ea76c2f5628010968945a28edbVincent Lejeunechar R600Packetizer::ID = 0; 5725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 5825f259cde28860ea76c2f5628010968945a28edbVincent Lejeuneclass R600PacketizerList : public VLIWPacketizerList { 5925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 6025f259cde28860ea76c2f5628010968945a28edbVincent Lejeuneprivate: 6125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune const R600InstrInfo *TII; 6225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune const R600RegisterInfo &TRI; 63bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune bool VLIW5; 64bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune bool ConsideredInstUsesAlreadyWrittenVectorElement; 6525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 6625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune unsigned getSlot(const MachineInstr *MI) const { 6725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return TRI.getHWRegChan(MI->getOperand(0).getReg()); 6825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 6925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 70152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune /// \returns register to PV chan mapping for bundle/single instructions that 7136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines /// immediately precedes I. 72152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 73152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune const { 74152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune DenseMap<unsigned, unsigned> Result; 7525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune I--; 7625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 7725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return Result; 7825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 7925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (I->isBundle()) 8025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune BI++; 81bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune int LastDstChan = -1; 82152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune do { 83bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune bool isTrans = false; 84bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune int BISlot = getSlot(BI); 85bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (LastDstChan >= BISlot) 86bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune isTrans = true; 87bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune LastDstChan = BISlot; 88152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune if (TII->isPredicated(BI)) 89152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune continue; 905e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); 910c922879854f5a6ee60283b99c68089f76f94778Vincent Lejeune if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 92152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune continue; 93cedcfee405a22b245e869abe8609f094df34085aTom Stellard int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst); 94cedcfee405a22b245e869abe8609f094df34085aTom Stellard if (DstIdx == -1) { 95cedcfee405a22b245e869abe8609f094df34085aTom Stellard continue; 96cedcfee405a22b245e869abe8609f094df34085aTom Stellard } 97cedcfee405a22b245e869abe8609f094df34085aTom Stellard unsigned Dst = BI->getOperand(DstIdx).getReg(); 98bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (isTrans || TII->isTransOnly(BI)) { 998f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune Result[Dst] = AMDGPU::PS; 1008f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune continue; 1018f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune } 1024ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune if (BI->getOpcode() == AMDGPU::DOT4_r600 || 1034ed9917147b1d1f2616f7c941bbe6999b979f510Vincent Lejeune BI->getOpcode() == AMDGPU::DOT4_eg) { 104152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune Result[Dst] = AMDGPU::PV_X; 105152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune continue; 106152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune } 107e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard if (Dst == AMDGPU::OQAP) { 108e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard continue; 109e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard } 110152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune unsigned PVReg = 0; 111152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune switch (TRI.getHWRegChan(Dst)) { 112152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune case 0: 113152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune PVReg = AMDGPU::PV_X; 114152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune break; 115152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune case 1: 116152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune PVReg = AMDGPU::PV_Y; 117152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune break; 118152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune case 2: 119152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune PVReg = AMDGPU::PV_Z; 120152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune break; 121152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune case 3: 122152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune PVReg = AMDGPU::PV_W; 123152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune break; 124152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune default: 125152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune llvm_unreachable("Invalid Chan"); 126152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune } 127152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune Result[Dst] = PVReg; 128152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune } while ((++BI)->isBundledWithPred()); 12925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return Result; 13025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 13125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 132152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs) 133152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune const { 1345e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard unsigned Ops[] = { 1355e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard AMDGPU::OpName::src0, 1365e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard AMDGPU::OpName::src1, 1375e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard AMDGPU::OpName::src2 13825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune }; 13925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for (unsigned i = 0; i < 3; i++) { 14025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]); 14125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (OperandIdx < 0) 14225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 14325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune unsigned Src = MI->getOperand(OperandIdx).getReg(); 144152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 145152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune if (It != PVs.end()) 146152ebee8f3e8506dbf693ecdb2d8ab335eeae4d9Vincent Lejeune MI->getOperand(OperandIdx).setReg(It->second); 14725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 14825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 14925f259cde28860ea76c2f5628010968945a28edbVincent Lejeunepublic: 15025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Ctor. 15137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI) 15237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines : VLIWPacketizerList(MF, MLI, true), 15337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TII(static_cast<const R600InstrInfo *>( 15437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MF.getSubtarget().getInstrInfo())), 15537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TRI(TII->getRegisterInfo()) { 156ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines VLIW5 = !MF.getSubtarget<AMDGPUSubtarget>().hasCaymanISA(); 157bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune } 15825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 15925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // initPacketizerState - initialize some internal flags. 160dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void initPacketizerState() override { 161bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune ConsideredInstUsesAlreadyWrittenVectorElement = false; 162bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune } 16325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 16425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 165dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool ignorePseudoInstruction(MachineInstr *MI, 166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock *MBB) override { 16725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return false; 16825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 16925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 17025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // isSoloInstruction - return true if instruction MI can not be packetized 17125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // with any other instruction, which means that MI itself is a packet. 172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool isSoloInstruction(MachineInstr *MI) override { 17325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (TII->isVector(*MI)) 17425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return true; 17525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (!TII->isALUInstr(MI->getOpcode())) 17625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return true; 177cedcfee405a22b245e869abe8609f094df34085aTom Stellard if (MI->getOpcode() == AMDGPU::GROUP_BARRIER) 178cedcfee405a22b245e869abe8609f094df34085aTom Stellard return true; 179a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune // XXX: This can be removed once the packetizer properly handles all the 180a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune // LDS instruction group restrictions. 181a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune if (TII->isLDSInstr(MI->getOpcode())) 182a92f8ee2f3ee12d26f6ed0720c763021cfa22ca8Vincent Lejeune return true; 18325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return false; 18425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 18525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 18625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 18725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // together. 188dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 18925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 190bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (getSlot(MII) == getSlot(MIJ)) 191bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune ConsideredInstUsesAlreadyWrittenVectorElement = true; 19225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Does MII and MIJ share the same pred_sel ? 1935e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), 1945e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); 19525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0, 19625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0; 19725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (PredI != PredJ) 19825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return false; 19925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (SUJ->isSucc(SUI)) { 20025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 20125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune const SDep &Dep = SUJ->Succs[i]; 20225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (Dep.getSUnit() != SUI) 20325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 20425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (Dep.getKind() == SDep::Anti) 20525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 20625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (Dep.getKind() == SDep::Output) 20725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 20825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 20925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return false; 21025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 21125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 21204c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard 21304c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard bool ARDef = TII->definesAddressRegister(MII) || 21404c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard TII->definesAddressRegister(MIJ); 21504c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard bool ARUse = TII->usesAddressRegister(MII) || 21604c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard TII->usesAddressRegister(MIJ); 21704c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard if (ARDef && ARUse) 21804c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard return false; 21904c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard 22025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return true; 22125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 22225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 22325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 22425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // and SUJ. 225dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 226dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 227dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 22825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 22925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 2305e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); 23125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MI->getOperand(LastOp).setImm(Bit); 23225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 23325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 2348f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune bool isBundlableWithCurrentPMI(MachineInstr *MI, 2358f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune const DenseMap<unsigned, unsigned> &PV, 2368f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune std::vector<R600InstrInfo::BankSwizzle> &BS, 2378f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune bool &isTransSlot) { 2388f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune isTransSlot = TII->isTransOnly(MI); 239bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune assert (!isTransSlot || VLIW5); 240bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune 241bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune // Is the dst reg sequence legal ? 242bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (!isTransSlot && !CurrentPacketMIs.empty()) { 243bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) { 244bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (ConsideredInstUsesAlreadyWrittenVectorElement && 245bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune !TII->isVectorOnly(MI) && VLIW5) { 246bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune isTransSlot = true; 247bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump();); 248bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune } 249bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune else 250bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune return false; 251bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune } 252bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune } 2538f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune 2548f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune // Are the Constants limitations met ? 25525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune CurrentPacketMIs.push_back(MI); 2568f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 2578f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune DEBUG( 25825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "Couldn't pack :\n"; 25925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MI->dump(); 26025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "with the following packets :\n"; 26125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 26225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune CurrentPacketMIs[i]->dump(); 26325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "\n"; 26425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 26525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "because of Consts read limitations\n"; 2668f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune ); 2678f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune CurrentPacketMIs.pop_back(); 2688f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune return false; 2698f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune } 2708f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune 2718f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune // Is there a BankSwizzle set that meet Read Port limitations ? 2728f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 2738f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune PV, BS, isTransSlot)) { 2748f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune DEBUG( 27525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "Couldn't pack :\n"; 27625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MI->dump(); 27725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "with the following packets :\n"; 27825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 27925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune CurrentPacketMIs[i]->dump(); 28025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "\n"; 28125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 28225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune dbgs() << "because of Read port limitations\n"; 2838f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune ); 2848f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune CurrentPacketMIs.pop_back(); 2858f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune return false; 2868f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune } 2878f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune 288ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard // We cannot read LDS source registrs from the Trans slot. 289ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard if (isTransSlot && TII->readsLDSSrcReg(MI)) 290ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard return false; 291ac779b8494ad3d2f2ea40cb566552c0fb1b17363Tom Stellard 2928f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune CurrentPacketMIs.pop_back(); 2938f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune return true; 2948f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune } 2958f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune 296dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override { 2978f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune MachineBasicBlock::iterator FirstInBundle = 2988f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front(); 2998f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune const DenseMap<unsigned, unsigned> &PV = 3008f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune getPreviousVector(FirstInBundle); 3018f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune std::vector<R600InstrInfo::BankSwizzle> BS; 3028f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune bool isTransSlot; 3038f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune 3048f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 30525c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 30625c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune MachineInstr *MI = CurrentPacketMIs[i]; 3078f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune unsigned Op = TII->getOperandIdx(MI->getOpcode(), 3088f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune AMDGPU::OpName::bank_swizzle); 3098f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune MI->getOperand(Op).setImm(BS[i]); 31025c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune } 3118f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune unsigned Op = TII->getOperandIdx(MI->getOpcode(), 3128f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune AMDGPU::OpName::bank_swizzle); 3138f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune MI->getOperand(Op).setImm(BS.back()); 3148f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune if (!CurrentPacketMIs.empty()) 3158f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune setIsLastBit(CurrentPacketMIs.back(), 0); 3168f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune substitutePV(MI, PV); 3178f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 3188f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune if (isTransSlot) { 31936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines endPacket(std::next(It)->getParent(), std::next(It)); 3208f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune } 3218f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune return It; 32225c209e9a262b623deca60fb6b886907e22c941bVincent Lejeune } 3238f9fbd67c3f803f7397843fdf4b2a7b7ca10189eVincent Lejeune endPacket(MI->getParent(), MI); 324bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune if (TII->isTransOnly(MI)) 325bb25a01d232257b134f1f6a5810116cbb04b95b1Vincent Lejeune return MI; 32625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return VLIWPacketizerList::addToPacket(MI); 32725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 32825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune}; 32925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 33025f259cde28860ea76c2f5628010968945a28edbVincent Lejeunebool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 33137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); 33225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 33325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 33425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Instantiate the packetizer. 33537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines R600PacketizerList Packetizer(Fn, MLI); 33625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 33725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // DFA state table should not be empty. 33825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 33925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 34025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // 34125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Loop over all basic blocks and remove KILL pseudo-instructions 34225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // These instructions confuse the dependence analysis. Consider: 34325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // D0 = ... (Insn 0) 34425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // R0 = KILL R0, D0 (Insn 1) 34525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // R0 = ... (Insn 2) 34625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Here, Insn 1 will result in the dependence graph not emitting an output 34725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // dependence between Insn 0 and Insn 2. This can lead to incorrect 34825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // packetization 34925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // 35025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 35125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MBB != MBBe; ++MBB) { 35225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineBasicBlock::iterator End = MBB->end(); 35325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineBasicBlock::iterator MI = MBB->begin(); 35425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune while (MI != End) { 355de28bdadff78ceea6bb05e23dc3b4cc92fa359edTom Stellard if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF || 356f2cfef8172fd2eceb036b8caff50623a189ba2ffVincent Lejeune (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) { 35725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineBasicBlock::iterator DeleteMI = MI; 35825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune ++MI; 35925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MBB->erase(DeleteMI); 36025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune End = MBB->end(); 36125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 36225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 36325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune ++MI; 36425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 36525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 36625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 36725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Loop over all of the basic blocks. 36825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 36925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MBB != MBBe; ++MBB) { 37025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Find scheduling regions and schedule / packetize each region. 37125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune unsigned RemainingCount = MBB->size(); 37225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 37325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune RegionEnd != MBB->begin();) { 37425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // The next region starts above the previous region. Look backward in the 37525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // instruction stream until we find the nearest boundary. 37625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune MachineBasicBlock::iterator I = RegionEnd; 37725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune for(;I != MBB->begin(); --I, --RemainingCount) { 37836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn)) 37925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune break; 38025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 38125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune I = MBB->begin(); 38225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 38325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Skip empty scheduling regions. 38425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune if (I == RegionEnd) { 38536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines RegionEnd = std::prev(RegionEnd); 38625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune --RemainingCount; 38725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 38825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 38925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune // Skip regions with one instruction. 39036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (I == std::prev(RegionEnd)) { 39136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines RegionEnd = std::prev(RegionEnd); 39225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune continue; 39325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 39425f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 39525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune Packetizer.PacketizeMIs(MBB, I, RegionEnd); 39625f259cde28860ea76c2f5628010968945a28edbVincent Lejeune RegionEnd = I; 39725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 39825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune } 39925f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 40025f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return true; 40125f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 40225f259cde28860ea76c2f5628010968945a28edbVincent Lejeune} 40325f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 4045c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer} // end anonymous namespace 40525f259cde28860ea76c2f5628010968945a28edbVincent Lejeune 40625f259cde28860ea76c2f5628010968945a28edbVincent Lejeunellvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) { 40725f259cde28860ea76c2f5628010968945a28edbVincent Lejeune return new R600Packetizer(tm); 40825f259cde28860ea76c2f5628010968945a28edbVincent Lejeune} 409