AMDGPUAsmPrinter.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// 12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary 13/// code. When passed an MCAsmStreamer it prints assembly and when passed 14/// an MCObjectStreamer it outputs binary code. 15// 16//===----------------------------------------------------------------------===// 17// 18 19 20#include "AMDGPUAsmPrinter.h" 21#include "AMDGPU.h" 22#include "R600Defines.h" 23#include "R600MachineFunctionInfo.h" 24#include "R600RegisterInfo.h" 25#include "SIDefines.h" 26#include "SIMachineFunctionInfo.h" 27#include "SIRegisterInfo.h" 28#include "llvm/MC/MCContext.h" 29#include "llvm/MC/MCSectionELF.h" 30#include "llvm/MC/MCStreamer.h" 31#include "llvm/Support/ELF.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Support/TargetRegistry.h" 34#include "llvm/Target/TargetLoweringObjectFile.h" 35 36using namespace llvm; 37 38 39static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, 40 MCStreamer &Streamer) { 41 return new AMDGPUAsmPrinter(tm, Streamer); 42} 43 44extern "C" void LLVMInitializeR600AsmPrinter() { 45 TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass); 46} 47 48AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) 49 : AsmPrinter(TM, Streamer) { 50 DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode(); 51} 52 53bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { 54 SetupMachineFunction(MF); 55 56 OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':')); 57 58 MCContext &Context = getObjFileLowering().getContext(); 59 const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config", 60 ELF::SHT_PROGBITS, 0, 61 SectionKind::getReadOnly()); 62 OutStreamer.SwitchSection(ConfigSection); 63 64 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 65 SIProgramInfo KernelInfo; 66 if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 67 findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR); 68 EmitProgramInfoSI(MF, KernelInfo); 69 } else { 70 EmitProgramInfoR600(MF); 71 } 72 73 DisasmLines.clear(); 74 HexLines.clear(); 75 DisasmLineMaxLen = 0; 76 77 OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); 78 EmitFunctionBody(); 79 80 if (isVerbose()) { 81 const MCSectionELF *CommentSection 82 = Context.getELFSection(".AMDGPU.csdata", 83 ELF::SHT_PROGBITS, 0, 84 SectionKind::getReadOnly()); 85 OutStreamer.SwitchSection(CommentSection); 86 87 if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { 88 OutStreamer.emitRawComment(" Kernel info:", false); 89 OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), 90 false); 91 OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), 92 false); 93 } else { 94 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 95 OutStreamer.emitRawComment( 96 Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize))); 97 } 98 } 99 100 if (STM.dumpCode()) { 101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 102 MF.dump(); 103#endif 104 105 if (DisasmEnabled) { 106 OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm", 107 ELF::SHT_NOTE, 0, 108 SectionKind::getReadOnly())); 109 110 for (size_t i = 0; i < DisasmLines.size(); ++i) { 111 std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' '); 112 Comment += " ; " + HexLines[i] + "\n"; 113 114 OutStreamer.EmitBytes(StringRef(DisasmLines[i])); 115 OutStreamer.EmitBytes(StringRef(Comment)); 116 } 117 } 118 } 119 120 return false; 121} 122 123void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { 124 unsigned MaxGPR = 0; 125 bool killPixel = false; 126 const R600RegisterInfo * RI = 127 static_cast<const R600RegisterInfo*>(TM.getRegisterInfo()); 128 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 129 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 130 131 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 132 BB != BB_E; ++BB) { 133 MachineBasicBlock &MBB = *BB; 134 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 135 I != E; ++I) { 136 MachineInstr &MI = *I; 137 if (MI.getOpcode() == AMDGPU::KILLGT) 138 killPixel = true; 139 unsigned numOperands = MI.getNumOperands(); 140 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 141 MachineOperand & MO = MI.getOperand(op_idx); 142 if (!MO.isReg()) 143 continue; 144 unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff; 145 146 // Register with value > 127 aren't GPR 147 if (HWReg > 127) 148 continue; 149 MaxGPR = std::max(MaxGPR, HWReg); 150 } 151 } 152 } 153 154 unsigned RsrcReg; 155 if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) { 156 // Evergreen / Northern Islands 157 switch (MFI->ShaderType) { 158 default: // Fall through 159 case ShaderType::COMPUTE: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break; 160 case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break; 161 case ShaderType::PIXEL: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break; 162 case ShaderType::VERTEX: RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break; 163 } 164 } else { 165 // R600 / R700 166 switch (MFI->ShaderType) { 167 default: // Fall through 168 case ShaderType::GEOMETRY: // Fall through 169 case ShaderType::COMPUTE: // Fall through 170 case ShaderType::VERTEX: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break; 171 case ShaderType::PIXEL: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break; 172 } 173 } 174 175 OutStreamer.EmitIntValue(RsrcReg, 4); 176 OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) | 177 S_STACK_SIZE(MFI->StackSize), 4); 178 OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4); 179 OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4); 180 181 if (MFI->ShaderType == ShaderType::COMPUTE) { 182 OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4); 183 OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4); 184 } 185} 186 187void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, 188 unsigned &NumSGPR, 189 unsigned &NumVGPR) const { 190 unsigned MaxSGPR = 0; 191 unsigned MaxVGPR = 0; 192 bool VCCUsed = false; 193 const SIRegisterInfo * RI = 194 static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); 195 196 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 197 BB != BB_E; ++BB) { 198 MachineBasicBlock &MBB = *BB; 199 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 200 I != E; ++I) { 201 MachineInstr &MI = *I; 202 203 unsigned numOperands = MI.getNumOperands(); 204 for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { 205 MachineOperand &MO = MI.getOperand(op_idx); 206 unsigned width = 0; 207 bool isSGPR = false; 208 209 if (!MO.isReg()) { 210 continue; 211 } 212 unsigned reg = MO.getReg(); 213 if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO || 214 reg == AMDGPU::VCC_HI) { 215 VCCUsed = true; 216 continue; 217 } 218 219 switch (reg) { 220 default: break; 221 case AMDGPU::SCC: 222 case AMDGPU::EXEC: 223 case AMDGPU::M0: 224 continue; 225 } 226 227 if (AMDGPU::SReg_32RegClass.contains(reg)) { 228 isSGPR = true; 229 width = 1; 230 } else if (AMDGPU::VReg_32RegClass.contains(reg)) { 231 isSGPR = false; 232 width = 1; 233 } else if (AMDGPU::SReg_64RegClass.contains(reg)) { 234 isSGPR = true; 235 width = 2; 236 } else if (AMDGPU::VReg_64RegClass.contains(reg)) { 237 isSGPR = false; 238 width = 2; 239 } else if (AMDGPU::VReg_96RegClass.contains(reg)) { 240 isSGPR = false; 241 width = 3; 242 } else if (AMDGPU::SReg_128RegClass.contains(reg)) { 243 isSGPR = true; 244 width = 4; 245 } else if (AMDGPU::VReg_128RegClass.contains(reg)) { 246 isSGPR = false; 247 width = 4; 248 } else if (AMDGPU::SReg_256RegClass.contains(reg)) { 249 isSGPR = true; 250 width = 8; 251 } else if (AMDGPU::VReg_256RegClass.contains(reg)) { 252 isSGPR = false; 253 width = 8; 254 } else if (AMDGPU::SReg_512RegClass.contains(reg)) { 255 isSGPR = true; 256 width = 16; 257 } else if (AMDGPU::VReg_512RegClass.contains(reg)) { 258 isSGPR = false; 259 width = 16; 260 } else { 261 llvm_unreachable("Unknown register class"); 262 } 263 unsigned hwReg = RI->getEncodingValue(reg) & 0xff; 264 unsigned maxUsed = hwReg + width - 1; 265 if (isSGPR) { 266 MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR; 267 } else { 268 MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR; 269 } 270 } 271 } 272 } 273 274 if (VCCUsed) 275 MaxSGPR += 2; 276 277 NumSGPR = MaxSGPR; 278 NumVGPR = MaxVGPR; 279} 280 281void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out, 282 MachineFunction &MF) const { 283 findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR); 284} 285 286void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, 287 const SIProgramInfo &KernelInfo) { 288 const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); 289 290 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 291 unsigned RsrcReg; 292 switch (MFI->ShaderType) { 293 default: // Fall through 294 case ShaderType::COMPUTE: RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break; 295 case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break; 296 case ShaderType::PIXEL: RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break; 297 case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; 298 } 299 300 OutStreamer.EmitIntValue(RsrcReg, 4); 301 OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | 302 S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); 303 304 unsigned LDSAlignShift; 305 if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { 306 // LDS is allocated in 64 dword blocks 307 LDSAlignShift = 8; 308 } else { 309 // LDS is allocated in 128 dword blocks 310 LDSAlignShift = 9; 311 } 312 unsigned LDSBlocks = 313 RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; 314 315 if (MFI->ShaderType == ShaderType::COMPUTE) { 316 OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); 317 OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4); 318 } 319 if (MFI->ShaderType == ShaderType::PIXEL) { 320 OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); 321 OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4); 322 OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); 323 OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); 324 } 325} 326