1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// The AMDGPUAsmPrinter is used to print both assembly string and also binary 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// code. When passed an MCAsmStreamer it prints assembly and when passed 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// an MCObjectStreamer it outputs binary code. 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUAsmPrinter.h" 21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h" 22cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "AMDGPUSubtarget.h" 23f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard#include "R600Defines.h" 242a74639bc7713146b1182328892807c421c84265Vincent Lejeune#include "R600MachineFunctionInfo.h" 25141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune#include "R600RegisterInfo.h" 265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIDefines.h" 275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIMachineFunctionInfo.h" 285c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIRegisterInfo.h" 29bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCContext.h" 30bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCSectionELF.h" 31f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/MC/MCStreamer.h" 32bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/Support/ELF.h" 33e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard#include "llvm/Support/MathExtras.h" 34f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/Support/TargetRegistry.h" 3558a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "llvm/Target/TargetLoweringObjectFile.h" 36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 39cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// TODO: This should get the default rounding mode from the kernel. We just set 40cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// the default here, but this could change if the OpenCL rounding mode pragmas 41cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// are used. 42cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// 43cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// The denormal mode here should match what is reported by the OpenCL runtime 44cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but 45cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// can also be override to flush with the -cl-denorms-are-zero compiler flag. 46cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// 47cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double 48cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// precision, and leaves single precision to flush all and does not report 49cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports 50cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// CL_FP_DENORM for both. 51cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesstatic uint32_t getFPMode(MachineFunction &) { 52cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | 53cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | 54cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) | 55cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE); 56cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines} 57f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 58f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardstatic AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, 59f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MCStreamer &Streamer) { 60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return new AMDGPUAsmPrinter(tm, Streamer); 61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 62f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 63f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardextern "C" void LLVMInitializeR600AsmPrinter() { 64f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); 65f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 66f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 67f9318673178309288f9320efe02d529419ac32a2Tom StellardAMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) 6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines : AsmPrinter(TM, Streamer) { 6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode(); 70f9318673178309288f9320efe02d529419ac32a2Tom Stellard} 71f9318673178309288f9320efe02d529419ac32a2Tom Stellard 72f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { 73f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard SetupMachineFunction(MF); 7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 7536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':')); 76141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 77f9318673178309288f9320efe02d529419ac32a2Tom Stellard MCContext &Context = getObjFileLowering().getContext(); 78f9318673178309288f9320efe02d529419ac32a2Tom Stellard const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", 7987cba4a4c1d5b8b026c83b0916b37255600ecd5fTom Stellard ELF::SHT_PROGBITS, 0, 80141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune SectionKind::getReadOnly()); 81141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune OutStreamer.SwitchSection(ConfigSection); 8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 83f9318673178309288f9320efe02d529419ac32a2Tom Stellard const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SIProgramInfo KernelInfo; 853ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines getSIProgramInfo(KernelInfo, MF); 8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EmitProgramInfoSI(MF, KernelInfo); 88141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } else { 89141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune EmitProgramInfoR600(MF); 90f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 91f9318673178309288f9320efe02d529419ac32a2Tom Stellard 92f9318673178309288f9320efe02d529419ac32a2Tom Stellard DisasmLines.clear(); 93f9318673178309288f9320efe02d529419ac32a2Tom Stellard HexLines.clear(); 94f9318673178309288f9320efe02d529419ac32a2Tom Stellard DisasmLineMaxLen = 0; 95f9318673178309288f9320efe02d529419ac32a2Tom Stellard 96bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); 97f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard EmitFunctionBody(); 98f9318673178309288f9320efe02d529419ac32a2Tom Stellard 9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (isVerbose()) { 10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MCSectionELF *CommentSection 10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines = Context.getELFSection(".AMDGPU.csdata", 10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ELF::SHT_PROGBITS, 0, 10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SectionKind::getReadOnly()); 10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.SwitchSection(CommentSection); 10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 106dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(" Kernel info:", false); 108dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen), 109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines false); 11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), 11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines false); 11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), 11336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines false); 114cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), 115cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines false); 116cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), 117cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines false); 11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else { 11936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 12036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines OutStreamer.emitRawComment( 12136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize))); 12236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 125f9318673178309288f9320efe02d529419ac32a2Tom Stellard if (STM.dumpCode()) { 126f9318673178309288f9320efe02d529419ac32a2Tom Stellard#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 127f9318673178309288f9320efe02d529419ac32a2Tom Stellard MF.dump(); 128f9318673178309288f9320efe02d529419ac32a2Tom Stellard#endif 129f9318673178309288f9320efe02d529419ac32a2Tom Stellard 130f9318673178309288f9320efe02d529419ac32a2Tom Stellard if (DisasmEnabled) { 131f9318673178309288f9320efe02d529419ac32a2Tom Stellard OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm", 132f9318673178309288f9320efe02d529419ac32a2Tom Stellard ELF::SHT_NOTE, 0, 133f9318673178309288f9320efe02d529419ac32a2Tom Stellard SectionKind::getReadOnly())); 134f9318673178309288f9320efe02d529419ac32a2Tom Stellard 135f9318673178309288f9320efe02d529419ac32a2Tom Stellard for (size_t i = 0; i < DisasmLines.size(); ++i) { 136f9318673178309288f9320efe02d529419ac32a2Tom Stellard std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' '); 137f9318673178309288f9320efe02d529419ac32a2Tom Stellard Comment += " ; " + HexLines[i] + "\n"; 138f9318673178309288f9320efe02d529419ac32a2Tom Stellard 139f9318673178309288f9320efe02d529419ac32a2Tom Stellard OutStreamer.EmitBytes(StringRef(DisasmLines[i])); 140f9318673178309288f9320efe02d529419ac32a2Tom Stellard OutStreamer.EmitBytes(StringRef(Comment)); 141f9318673178309288f9320efe02d529419ac32a2Tom Stellard } 142f9318673178309288f9320efe02d529419ac32a2Tom Stellard } 143f9318673178309288f9320efe02d529419ac32a2Tom Stellard } 144f9318673178309288f9320efe02d529419ac32a2Tom Stellard 145f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return false; 146f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 147f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 148141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeunevoid AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { 149141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune unsigned MaxGPR = 0; 15086cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune bool killPixel = false; 151141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune const R600RegisterInfo * RI = 152141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune static_cast<const R600RegisterInfo*>(TM.getRegisterInfo()); 1532a74639bc7713146b1182328892807c421c84265Vincent Lejeune R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 154f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 155141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 156141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 157141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune BB != BB_E; ++BB) { 158141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MachineBasicBlock &MBB = *BB; 159141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 160141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune I != E; ++I) { 161141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MachineInstr &MI = *I; 16286cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune if (MI.getOpcode() == AMDGPU::KILLGT) 16386cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune killPixel = true; 164141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune unsigned numOperands = MI.getNumOperands(); 165141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 166141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MachineOperand & MO = MI.getOperand(op_idx); 167141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune if (!MO.isReg()) 168141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune continue; 169141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; 170141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 171141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune // Register with value > 127 aren't GPR 172141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune if (HWReg > 127) 173141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune continue; 174141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune MaxGPR = std::max(MaxGPR, HWReg); 175141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } 176141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } 177141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune } 178f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard 179f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard unsigned RsrcReg; 1803ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { 181f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard // Evergreen / Northern Islands 182f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard switch (MFI->ShaderType) { 183f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard default: // Fall through 184f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; 185f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; 186f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; 187f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break; 188f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } 189f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } else { 190f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard // R600 / R700 191f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard switch (MFI->ShaderType) { 192f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard default: // Fall through 193f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::GEOMETRY: // Fall through 194f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::COMPUTE: // Fall through 195f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; 196f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; 197f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } 198f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard } 199f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard 200f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(RsrcReg, 4); 201f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | 202f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard S_STACK_SIZE(MFI->StackSize), 4); 203f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); 204f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); 205e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard 206e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard if (MFI->ShaderType == ShaderType::COMPUTE) { 207e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); 208e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4); 209e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard } 210141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune} 211141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune 212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, 213dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineFunction &MF) const { 214dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines uint64_t CodeSize = 0; 215f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned MaxSGPR = 0; 216f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned MaxVGPR = 0; 217f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool VCCUsed = false; 218f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard const SIRegisterInfo * RI = 219f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 220f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 221f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BB != BB_E; ++BB) { 223f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineBasicBlock &MBB = *BB; 224f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 225f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard I != E; ++I) { 226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr &MI = *I; 227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 228dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // TODO: CodeSize should account for multiple functions. 229dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines CodeSize += MI.getDesc().Size; 230dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned numOperands = MI.getNumOperands(); 232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 23329a651af8a4b522daf1f9102c93e4c8ecc2ef3c2Matt Arsenault MachineOperand &MO = MI.getOperand(op_idx); 234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard unsigned width = 0; 235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard bool isSGPR = false; 23636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (!MO.isReg()) { 238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 239f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 24036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned reg = MO.getReg(); 24136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO || 24236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines reg == AMDGPU::VCC_HI) { 243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard VCCUsed = true; 244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 2468305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault 247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (reg) { 248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: break; 2498305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault case AMDGPU::SCC: 250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::EXEC: 251f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::M0: 252f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard continue; 253f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 254f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 255f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (AMDGPU::SReg_32RegClass.contains(reg)) { 256f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 257f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 1; 258f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::VReg_32RegClass.contains(reg)) { 259f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = false; 260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 1; 261f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::SReg_64RegClass.contains(reg)) { 262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 263f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 2; 264f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::VReg_64RegClass.contains(reg)) { 265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = false; 266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 2; 2674d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig } else if (AMDGPU::VReg_96RegClass.contains(reg)) { 2684d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig isSGPR = false; 2694d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig width = 3; 270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::SReg_128RegClass.contains(reg)) { 271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 4; 273f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::VReg_128RegClass.contains(reg)) { 274f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = false; 275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 4; 276f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else if (AMDGPU::SReg_256RegClass.contains(reg)) { 277f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard isSGPR = true; 278f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard width = 8; 27936ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard } else if (AMDGPU::VReg_256RegClass.contains(reg)) { 28036ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard isSGPR = false; 28136ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard width = 8; 282f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard } else if (AMDGPU::SReg_512RegClass.contains(reg)) { 283f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard isSGPR = true; 284f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard width = 16; 28536ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard } else if (AMDGPU::VReg_512RegClass.contains(reg)) { 28636ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard isSGPR = false; 28736ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard width = 16; 288f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 28936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Unknown register class"); 290f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 29136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned hwReg = RI->getEncodingValue(reg) & 0xff; 29236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned maxUsed = hwReg + width - 1; 293f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (isSGPR) { 294f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; 295f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 296f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; 297f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 298f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 299f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 300f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 30236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (VCCUsed) 303f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MaxSGPR += 2; 30436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 305dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ProgInfo.NumVGPR = MaxVGPR; 306cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ProgInfo.NumSGPR = MaxSGPR; 307cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 308cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode 309cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // register. 310cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ProgInfo.FloatMode = getFPMode(MF); 311cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 312cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // XXX: Not quite sure what this does, but sc seems to unset this. 313cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ProgInfo.IEEEMode = 0; 314cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 315cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Do not clamp NAN to 0. 316cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ProgInfo.DX10Clamp = 0; 317cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 318cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ProgInfo.CodeLen = CodeSize; 31936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 32036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 32136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, 32236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const SIProgramInfo &KernelInfo) { 32336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 32436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 325cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 3269a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard unsigned RsrcReg; 3279a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard switch (MFI->ShaderType) { 3289a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard default: // Fall through 3299a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break; 3309a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break; 3319a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break; 3329a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; 3339a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard } 3349a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard 33554328c772c5519e56c13667c2b1d1e830580c44dTom Stellard unsigned LDSAlignShift; 33654328c772c5519e56c13667c2b1d1e830580c44dTom Stellard if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { 337cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // LDS is allocated in 64 dword blocks. 33854328c772c5519e56c13667c2b1d1e830580c44dTom Stellard LDSAlignShift = 8; 33954328c772c5519e56c13667c2b1d1e830580c44dTom Stellard } else { 340cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // LDS is allocated in 128 dword blocks. 34154328c772c5519e56c13667c2b1d1e830580c44dTom Stellard LDSAlignShift = 9; 34254328c772c5519e56c13667c2b1d1e830580c44dTom Stellard } 343cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 34454328c772c5519e56c13667c2b1d1e830580c44dTom Stellard unsigned LDSBlocks = 345cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; 34654328c772c5519e56c13667c2b1d1e830580c44dTom Stellard 347a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer if (MFI->ShaderType == ShaderType::COMPUTE) { 348cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4); 349cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 350cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines const uint32_t ComputePGMRSrc1 = 351cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_VGPRS(KernelInfo.NumVGPR / 4) | 352cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_SGPRS(KernelInfo.NumSGPR / 8) | 353cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_PRIORITY(KernelInfo.Priority) | 354cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_FLOAT_MODE(KernelInfo.FloatMode) | 355cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_PRIV(KernelInfo.Priv) | 356cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) | 357cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_IEEE_MODE(KernelInfo.DebugMode) | 358cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B848_IEEE_MODE(KernelInfo.IEEEMode); 359cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 360cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines OutStreamer.EmitIntValue(ComputePGMRSrc1, 4); 361cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 362a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); 36354328c772c5519e56c13667c2b1d1e830580c44dTom Stellard OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4); 364cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } else { 365cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines OutStreamer.EmitIntValue(RsrcReg, 4); 366cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | 367cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); 368a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer } 369cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 3709a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard if (MFI->ShaderType == ShaderType::PIXEL) { 371a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); 37254328c772c5519e56c13667c2b1d1e830580c44dTom Stellard OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4); 3739a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); 3749a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); 3759a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard } 376f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 377