AMDGPUAsmPrinter.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13/// code.  When passed an MCAsmStreamer it prints assembly and when passed
14/// an MCObjectStreamer it outputs binary code.
15//
16//===----------------------------------------------------------------------===//
17//
18
19
20#include "AMDGPUAsmPrinter.h"
21#include "AMDGPU.h"
22#include "R600Defines.h"
23#include "R600MachineFunctionInfo.h"
24#include "R600RegisterInfo.h"
25#include "SIDefines.h"
26#include "SIMachineFunctionInfo.h"
27#include "SIRegisterInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCSectionELF.h"
30#include "llvm/MC/MCStreamer.h"
31#include "llvm/Support/ELF.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Support/TargetRegistry.h"
34#include "llvm/Target/TargetLoweringObjectFile.h"
35
36using namespace llvm;
37
38
39static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
40                                              MCStreamer &Streamer) {
41  return new AMDGPUAsmPrinter(tm, Streamer);
42}
43
44extern "C" void LLVMInitializeR600AsmPrinter() {
45  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
46}
47
48AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
49    : AsmPrinter(TM, Streamer) {
50  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
51}
52
53bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
54  SetupMachineFunction(MF);
55
56  OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
57
58  MCContext &Context = getObjFileLowering().getContext();
59  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
60                                              ELF::SHT_PROGBITS, 0,
61                                              SectionKind::getReadOnly());
62  OutStreamer.SwitchSection(ConfigSection);
63
64  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
65  SIProgramInfo KernelInfo;
66  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
67    getSIProgramInfo(KernelInfo, MF);
68    EmitProgramInfoSI(MF, KernelInfo);
69  } else {
70    EmitProgramInfoR600(MF);
71  }
72
73  DisasmLines.clear();
74  HexLines.clear();
75  DisasmLineMaxLen = 0;
76
77  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
78  EmitFunctionBody();
79
80  if (isVerbose()) {
81    const MCSectionELF *CommentSection
82      = Context.getELFSection(".AMDGPU.csdata",
83                              ELF::SHT_PROGBITS, 0,
84                              SectionKind::getReadOnly());
85    OutStreamer.SwitchSection(CommentSection);
86
87    if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
88      OutStreamer.emitRawComment(" Kernel info:", false);
89      OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
90                                 false);
91      OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
92                                 false);
93      OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
94                                 false);
95    } else {
96      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
97      OutStreamer.emitRawComment(
98        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
99    }
100  }
101
102  if (STM.dumpCode()) {
103#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
104    MF.dump();
105#endif
106
107    if (DisasmEnabled) {
108      OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
109                                                  ELF::SHT_NOTE, 0,
110                                                  SectionKind::getReadOnly()));
111
112      for (size_t i = 0; i < DisasmLines.size(); ++i) {
113        std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
114        Comment += " ; " + HexLines[i] + "\n";
115
116        OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
117        OutStreamer.EmitBytes(StringRef(Comment));
118      }
119    }
120  }
121
122  return false;
123}
124
125void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
126  unsigned MaxGPR = 0;
127  bool killPixel = false;
128  const R600RegisterInfo * RI =
129                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
130  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
131  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
132
133  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
134                                                  BB != BB_E; ++BB) {
135    MachineBasicBlock &MBB = *BB;
136    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
137                                                    I != E; ++I) {
138      MachineInstr &MI = *I;
139      if (MI.getOpcode() == AMDGPU::KILLGT)
140        killPixel = true;
141      unsigned numOperands = MI.getNumOperands();
142      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
143        MachineOperand & MO = MI.getOperand(op_idx);
144        if (!MO.isReg())
145          continue;
146        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
147
148        // Register with value > 127 aren't GPR
149        if (HWReg > 127)
150          continue;
151        MaxGPR = std::max(MaxGPR, HWReg);
152      }
153    }
154  }
155
156  unsigned RsrcReg;
157  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
158    // Evergreen / Northern Islands
159    switch (MFI->ShaderType) {
160    default: // Fall through
161    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
162    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
163    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
164    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
165    }
166  } else {
167    // R600 / R700
168    switch (MFI->ShaderType) {
169    default: // Fall through
170    case ShaderType::GEOMETRY: // Fall through
171    case ShaderType::COMPUTE:  // Fall through
172    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
173    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
174    }
175  }
176
177  OutStreamer.EmitIntValue(RsrcReg, 4);
178  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
179                           S_STACK_SIZE(MFI->StackSize), 4);
180  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
181  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
182
183  if (MFI->ShaderType == ShaderType::COMPUTE) {
184    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
185    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
186  }
187}
188
189void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
190                                        MachineFunction &MF) const {
191  uint64_t CodeSize = 0;
192  unsigned MaxSGPR = 0;
193  unsigned MaxVGPR = 0;
194  bool VCCUsed = false;
195  const SIRegisterInfo * RI =
196                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
197
198  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
199                                                  BB != BB_E; ++BB) {
200    MachineBasicBlock &MBB = *BB;
201    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
202                                                    I != E; ++I) {
203      MachineInstr &MI = *I;
204
205      // TODO: CodeSize should account for multiple functions.
206      CodeSize += MI.getDesc().Size;
207
208      unsigned numOperands = MI.getNumOperands();
209      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
210        MachineOperand &MO = MI.getOperand(op_idx);
211        unsigned width = 0;
212        bool isSGPR = false;
213
214        if (!MO.isReg()) {
215          continue;
216        }
217        unsigned reg = MO.getReg();
218        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
219	    reg == AMDGPU::VCC_HI) {
220          VCCUsed = true;
221          continue;
222        }
223
224        switch (reg) {
225        default: break;
226        case AMDGPU::SCC:
227        case AMDGPU::EXEC:
228        case AMDGPU::M0:
229          continue;
230        }
231
232        if (AMDGPU::SReg_32RegClass.contains(reg)) {
233          isSGPR = true;
234          width = 1;
235        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
236          isSGPR = false;
237          width = 1;
238        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
239          isSGPR = true;
240          width = 2;
241        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
242          isSGPR = false;
243          width = 2;
244        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
245          isSGPR = false;
246          width = 3;
247        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
248          isSGPR = true;
249          width = 4;
250        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
251          isSGPR = false;
252          width = 4;
253        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
254          isSGPR = true;
255          width = 8;
256        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
257          isSGPR = false;
258          width = 8;
259        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
260          isSGPR = true;
261          width = 16;
262        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
263          isSGPR = false;
264          width = 16;
265        } else {
266          llvm_unreachable("Unknown register class");
267        }
268        unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
269        unsigned maxUsed = hwReg + width - 1;
270        if (isSGPR) {
271          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
272        } else {
273          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
274        }
275      }
276    }
277  }
278
279  if (VCCUsed)
280    MaxSGPR += 2;
281
282  ProgInfo.CodeLen = CodeSize;
283  ProgInfo.NumSGPR = MaxSGPR;
284  ProgInfo.NumVGPR = MaxVGPR;
285}
286
287void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
288                                         const SIProgramInfo &KernelInfo) {
289  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
290
291  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
292  unsigned RsrcReg;
293  switch (MFI->ShaderType) {
294  default: // Fall through
295  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
296  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
297  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
298  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
299  }
300
301  OutStreamer.EmitIntValue(RsrcReg, 4);
302  OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
303                           S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
304
305  unsigned LDSAlignShift;
306  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
307    // LDS is allocated in 64 dword blocks
308    LDSAlignShift = 8;
309  } else {
310    // LDS is allocated in 128 dword blocks
311    LDSAlignShift = 9;
312  }
313  unsigned LDSBlocks =
314          RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
315
316  if (MFI->ShaderType == ShaderType::COMPUTE) {
317    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
318    OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
319  }
320  if (MFI->ShaderType == ShaderType::PIXEL) {
321    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
322    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
323    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
324    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
325  }
326}
327