AMDGPUAsmPrinter.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// The AMDGPUAsmPrinter is used to print both assembly string and also binary 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// code. When passed an MCAsmStreamer it prints assembly and when passed 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// an MCObjectStreamer it outputs binary code. 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUAsmPrinter.h" 21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h" 22f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard#include "R600Defines.h" 232a74639bc7713146b1182328892807c421c84265Vincent Lejeune#include "R600MachineFunctionInfo.h" 24141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune#include "R600RegisterInfo.h" 255c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIDefines.h" 265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIMachineFunctionInfo.h" 275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIRegisterInfo.h" 28bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCContext.h" 29bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCSectionELF.h" 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/MC/MCStreamer.h" 31bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/Support/ELF.h" 32e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard#include "llvm/Support/MathExtras.h" 33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/Support/TargetRegistry.h" 3458a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "llvm/Target/TargetLoweringObjectFile.h" 35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 39f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardstatic AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, 40f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MCStreamer &Streamer) { 41f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return new AMDGPUAsmPrinter(tm, Streamer); 42f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 43f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 44f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardextern "C" void LLVMInitializeR600AsmPrinter() { 45f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); 46f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 47f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 48f9318673178309288f9320efe02d529419ac32a2Tom StellardAMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) 4936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines : AsmPrinter(TM, Streamer) { 5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode(); 51f9318673178309288f9320efe02d529419ac32a2Tom Stellard} 52f9318673178309288f9320efe02d529419ac32a2Tom Stellard 53f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { 54f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard SetupMachineFunction(MF); 5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':')); 57141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 58f9318673178309288f9320efe02d529419ac32a2Tom Stellard MCContext &Context = getObjFileLowering().getContext(); 59f9318673178309288f9320efe02d529419ac32a2Tom Stellard const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", 6087cba4a4c1d5b8b026c83b0916b37255600ecd5fTom Stellard ELF::SHT_PROGBITS, 0, 61141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune SectionKind::getReadOnly()); 62141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune OutStreamer.SwitchSection(ConfigSection); 6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 64f9318673178309288f9320efe02d529419ac32a2Tom Stellard const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SIProgramInfo KernelInfo; 663ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR); 6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EmitProgramInfoSI(MF, KernelInfo); 69141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } else { 70141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune EmitProgramInfoR600(MF); 71f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 72f9318673178309288f9320efe02d529419ac32a2Tom Stellard 73f9318673178309288f9320efe02d529419ac32a2Tom Stellard DisasmLines.clear(); 74f9318673178309288f9320efe02d529419ac32a2Tom Stellard HexLines.clear(); 75f9318673178309288f9320efe02d529419ac32a2Tom Stellard DisasmLineMaxLen = 0; 76f9318673178309288f9320efe02d529419ac32a2Tom Stellard 77bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); 78f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard EmitFunctionBody(); 79f9318673178309288f9320efe02d529419ac32a2Tom Stellard 8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isVerbose()) { 8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MCSectionELF *CommentSection 8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines = Context.getELFSection(".AMDGPU.csdata", 8336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ELF::SHT_PROGBITS, 0, 8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SectionKind::getReadOnly()); 8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.SwitchSection(CommentSection); 8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(" Kernel info:", false); 8936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), 9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines false); 9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), 9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines false); 9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else { 9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment( 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize))); 9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 100f9318673178309288f9320efe02d529419ac32a2Tom Stellard if (STM.dumpCode()) { 101f9318673178309288f9320efe02d529419ac32a2Tom Stellard#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 102f9318673178309288f9320efe02d529419ac32a2Tom Stellard MF.dump(); 103f9318673178309288f9320efe02d529419ac32a2Tom Stellard#endif 104f9318673178309288f9320efe02d529419ac32a2Tom Stellard 105f9318673178309288f9320efe02d529419ac32a2Tom Stellard if (DisasmEnabled) { 106f9318673178309288f9320efe02d529419ac32a2Tom Stellard OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm", 107f9318673178309288f9320efe02d529419ac32a2Tom Stellard ELF::SHT_NOTE, 0, 108f9318673178309288f9320efe02d529419ac32a2Tom Stellard SectionKind::getReadOnly())); 109f9318673178309288f9320efe02d529419ac32a2Tom Stellard 110f9318673178309288f9320efe02d529419ac32a2Tom Stellard for (size_t i = 0; i < DisasmLines.size(); ++i) { 111f9318673178309288f9320efe02d529419ac32a2Tom Stellard std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' '); 112f9318673178309288f9320efe02d529419ac32a2Tom Stellard Comment += " ; " + HexLines[i] + "\n"; 113f9318673178309288f9320efe02d529419ac32a2Tom Stellard 114f9318673178309288f9320efe02d529419ac32a2Tom Stellard OutStreamer.EmitBytes(StringRef(DisasmLines[i])); 115f9318673178309288f9320efe02d529419ac32a2Tom Stellard OutStreamer.EmitBytes(StringRef(Comment)); 116f9318673178309288f9320efe02d529419ac32a2Tom Stellard } 117f9318673178309288f9320efe02d529419ac32a2Tom Stellard } 118f9318673178309288f9320efe02d529419ac32a2Tom Stellard } 119f9318673178309288f9320efe02d529419ac32a2Tom Stellard 120f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 121f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 122f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 123141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeunevoid AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { 124141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune unsigned MaxGPR = 0; 12586cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune bool killPixel = false; 126141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune const R600RegisterInfo * RI = 127141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune static_cast<const R600RegisterInfo*>(TM.getRegisterInfo()); 1282a74639bc7713146b1182328892807c421c84265Vincent Lejeune R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 129f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 130141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 131141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 132141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune BB != BB_E; ++BB) { 133141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MachineBasicBlock &MBB = *BB; 134141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 135141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune I != E; ++I) { 136141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MachineInstr &MI = *I; 13786cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune if (MI.getOpcode() == AMDGPU::KILLGT) 13886cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune killPixel = true; 139141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune unsigned numOperands = MI.getNumOperands(); 140141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 141141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MachineOperand & MO = MI.getOperand(op_idx); 142141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune if (!MO.isReg()) 143141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune continue; 144141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; 145141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 146141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune // Register with value > 127 aren't GPR 147141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune if (HWReg > 127) 148141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune continue; 149141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MaxGPR = std::max(MaxGPR, HWReg); 150141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } 151141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } 152141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } 153f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard 154f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard unsigned RsrcReg; 1553ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { 156f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard // Evergreen / Northern Islands 157f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard switch (MFI->ShaderType) { 158f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard default: // Fall through 159f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; 160f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; 161f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; 162f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break; 163f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } 164f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } else { 165f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard // R600 / R700 166f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard switch (MFI->ShaderType) { 167f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard default: // Fall through 168f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::GEOMETRY: // Fall through 169f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::COMPUTE: // Fall through 170f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; 171f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; 172f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } 173f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } 174f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard 175f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(RsrcReg, 4); 176f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | 177f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard S_STACK_SIZE(MFI->StackSize), 4); 178f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); 179f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); 180e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard 181e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard if (MFI->ShaderType == ShaderType::COMPUTE) { 182e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); 183e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4); 184e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard } 185141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune} 186141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, 18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned &NumSGPR, 18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned &NumVGPR) const { 190f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned MaxSGPR = 0; 191f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned MaxVGPR = 0; 192f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool VCCUsed = false; 193f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const SIRegisterInfo * RI = 194f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 195f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 196f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 197f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BB != BB_E; ++BB) { 198f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *BB; 199f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 200f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard I != E; ++I) { 201f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr &MI = *I; 202f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 203f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned numOperands = MI.getNumOperands(); 204f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 20529a651af8a4b522daf1f9102c93e4c8ecc2ef3c2Matt Arsenault MachineOperand &MO = MI.getOperand(op_idx); 206f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned width = 0; 207f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool isSGPR = false; 20836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 209f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!MO.isReg()) { 210f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 211f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 21236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned reg = MO.getReg(); 21336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO || 21436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines reg == AMDGPU::VCC_HI) { 215f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard VCCUsed = true; 216f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 217f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 2188305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault 219f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (reg) { 220f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: break; 2218305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault case AMDGPU::SCC: 222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::EXEC: 223f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::M0: 224f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 225f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (AMDGPU::SReg_32RegClass.contains(reg)) { 228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 229f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 1; 230f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::VReg_32RegClass.contains(reg)) { 231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = false; 232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 1; 233f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::SReg_64RegClass.contains(reg)) { 234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 2; 236f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::VReg_64RegClass.contains(reg)) { 237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = false; 238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 2; 2394d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig } else if (AMDGPU::VReg_96RegClass.contains(reg)) { 2404d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig isSGPR = false; 2414d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig width = 3; 242f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::SReg_128RegClass.contains(reg)) { 243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 4; 245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::VReg_128RegClass.contains(reg)) { 246f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = false; 247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 4; 248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::SReg_256RegClass.contains(reg)) { 249f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 8; 25136ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard } else if (AMDGPU::VReg_256RegClass.contains(reg)) { 25236ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard isSGPR = false; 25336ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard width = 8; 254f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard } else if (AMDGPU::SReg_512RegClass.contains(reg)) { 255f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard isSGPR = true; 256f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard width = 16; 25736ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard } else if (AMDGPU::VReg_512RegClass.contains(reg)) { 25836ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard isSGPR = false; 25936ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard width = 16; 260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 26136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Unknown register class"); 262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 26336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned hwReg = RI->getEncodingValue(reg) & 0xff; 26436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned maxUsed = hwReg + width - 1; 265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (isSGPR) { 266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; 267f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 268f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; 269f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 27336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 27436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (VCCUsed) 275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MaxSGPR += 2; 27636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 27736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines NumSGPR = MaxSGPR; 27836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines NumVGPR = MaxVGPR; 27936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 28136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out, 28236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineFunction &MF) const { 28336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR); 28436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 28536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 28636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, 28736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const SIProgramInfo &KernelInfo) { 28836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 28936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 29036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 2919a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard unsigned RsrcReg; 2929a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard switch (MFI->ShaderType) { 2939a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard default: // Fall through 2949a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break; 2959a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break; 2969a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break; 2979a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; 2989a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard } 2999a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard 3009a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard OutStreamer.EmitIntValue(RsrcReg, 4); 30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | 30236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); 303a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer 30454328c772c5519e56c13667c2b1d1e830580c44dTom Stellard unsigned LDSAlignShift; 30554328c772c5519e56c13667c2b1d1e830580c44dTom Stellard if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { 30654328c772c5519e56c13667c2b1d1e830580c44dTom Stellard // LDS is allocated in 64 dword blocks 30754328c772c5519e56c13667c2b1d1e830580c44dTom Stellard LDSAlignShift = 8; 30854328c772c5519e56c13667c2b1d1e830580c44dTom Stellard } else { 30954328c772c5519e56c13667c2b1d1e830580c44dTom Stellard // LDS is allocated in 128 dword blocks 31054328c772c5519e56c13667c2b1d1e830580c44dTom Stellard LDSAlignShift = 9; 31154328c772c5519e56c13667c2b1d1e830580c44dTom Stellard } 31254328c772c5519e56c13667c2b1d1e830580c44dTom Stellard unsigned LDSBlocks = 31354328c772c5519e56c13667c2b1d1e830580c44dTom Stellard RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; 31454328c772c5519e56c13667c2b1d1e830580c44dTom Stellard 315a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer if (MFI->ShaderType == ShaderType::COMPUTE) { 316a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); 31754328c772c5519e56c13667c2b1d1e830580c44dTom Stellard OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4); 318a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer } 3199a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard if (MFI->ShaderType == ShaderType::PIXEL) { 320a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); 32154328c772c5519e56c13667c2b1d1e830580c44dTom Stellard OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4); 3229a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); 3239a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); 3249a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard } 325f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 326