AMDGPUAsmPrinter.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11///
12/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13/// code.  When passed an MCAsmStreamer it prints assembly and when passed
14/// an MCObjectStreamer it outputs binary code.
15//
16//===----------------------------------------------------------------------===//
17//
18
19
20#include "AMDGPUAsmPrinter.h"
21#include "AMDGPU.h"
22#include "R600Defines.h"
23#include "R600MachineFunctionInfo.h"
24#include "R600RegisterInfo.h"
25#include "SIDefines.h"
26#include "SIMachineFunctionInfo.h"
27#include "SIRegisterInfo.h"
28#include "llvm/MC/MCContext.h"
29#include "llvm/MC/MCSectionELF.h"
30#include "llvm/MC/MCStreamer.h"
31#include "llvm/Support/ELF.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Support/TargetRegistry.h"
34#include "llvm/Target/TargetLoweringObjectFile.h"
35
36using namespace llvm;
37
38
39static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
40                                              MCStreamer &Streamer) {
41  return new AMDGPUAsmPrinter(tm, Streamer);
42}
43
44extern "C" void LLVMInitializeR600AsmPrinter() {
45  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
46}
47
48AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
49    : AsmPrinter(TM, Streamer) {
50  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
51}
52
53bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
54  SetupMachineFunction(MF);
55
56  OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
57
58  MCContext &Context = getObjFileLowering().getContext();
59  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
60                                              ELF::SHT_PROGBITS, 0,
61                                              SectionKind::getReadOnly());
62  OutStreamer.SwitchSection(ConfigSection);
63
64  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
65  SIProgramInfo KernelInfo;
66  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
67    findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR);
68    EmitProgramInfoSI(MF, KernelInfo);
69  } else {
70    EmitProgramInfoR600(MF);
71  }
72
73  DisasmLines.clear();
74  HexLines.clear();
75  DisasmLineMaxLen = 0;
76
77  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
78  EmitFunctionBody();
79
80  if (isVerbose()) {
81    const MCSectionELF *CommentSection
82      = Context.getELFSection(".AMDGPU.csdata",
83                              ELF::SHT_PROGBITS, 0,
84                              SectionKind::getReadOnly());
85    OutStreamer.SwitchSection(CommentSection);
86
87    if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
88      OutStreamer.emitRawComment(" Kernel info:", false);
89      OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
90                                 false);
91      OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
92                                 false);
93    } else {
94      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
95      OutStreamer.emitRawComment(
96        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
97    }
98  }
99
100  if (STM.dumpCode()) {
101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102    MF.dump();
103#endif
104
105    if (DisasmEnabled) {
106      OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
107                                                  ELF::SHT_NOTE, 0,
108                                                  SectionKind::getReadOnly()));
109
110      for (size_t i = 0; i < DisasmLines.size(); ++i) {
111        std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
112        Comment += " ; " + HexLines[i] + "\n";
113
114        OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
115        OutStreamer.EmitBytes(StringRef(Comment));
116      }
117    }
118  }
119
120  return false;
121}
122
123void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
124  unsigned MaxGPR = 0;
125  bool killPixel = false;
126  const R600RegisterInfo * RI =
127                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
128  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
129  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
130
131  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
132                                                  BB != BB_E; ++BB) {
133    MachineBasicBlock &MBB = *BB;
134    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
135                                                    I != E; ++I) {
136      MachineInstr &MI = *I;
137      if (MI.getOpcode() == AMDGPU::KILLGT)
138        killPixel = true;
139      unsigned numOperands = MI.getNumOperands();
140      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
141        MachineOperand & MO = MI.getOperand(op_idx);
142        if (!MO.isReg())
143          continue;
144        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
145
146        // Register with value > 127 aren't GPR
147        if (HWReg > 127)
148          continue;
149        MaxGPR = std::max(MaxGPR, HWReg);
150      }
151    }
152  }
153
154  unsigned RsrcReg;
155  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
156    // Evergreen / Northern Islands
157    switch (MFI->ShaderType) {
158    default: // Fall through
159    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
160    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
161    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
162    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
163    }
164  } else {
165    // R600 / R700
166    switch (MFI->ShaderType) {
167    default: // Fall through
168    case ShaderType::GEOMETRY: // Fall through
169    case ShaderType::COMPUTE:  // Fall through
170    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
171    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
172    }
173  }
174
175  OutStreamer.EmitIntValue(RsrcReg, 4);
176  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
177                           S_STACK_SIZE(MFI->StackSize), 4);
178  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
179  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
180
181  if (MFI->ShaderType == ShaderType::COMPUTE) {
182    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
183    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
184  }
185}
186
187void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
188                                              unsigned &NumSGPR,
189                                              unsigned &NumVGPR) const {
190  unsigned MaxSGPR = 0;
191  unsigned MaxVGPR = 0;
192  bool VCCUsed = false;
193  const SIRegisterInfo * RI =
194                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
195
196  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
197                                                  BB != BB_E; ++BB) {
198    MachineBasicBlock &MBB = *BB;
199    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
200                                                    I != E; ++I) {
201      MachineInstr &MI = *I;
202
203      unsigned numOperands = MI.getNumOperands();
204      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
205        MachineOperand &MO = MI.getOperand(op_idx);
206        unsigned width = 0;
207        bool isSGPR = false;
208
209        if (!MO.isReg()) {
210          continue;
211        }
212        unsigned reg = MO.getReg();
213        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
214	    reg == AMDGPU::VCC_HI) {
215          VCCUsed = true;
216          continue;
217        }
218
219        switch (reg) {
220        default: break;
221        case AMDGPU::SCC:
222        case AMDGPU::EXEC:
223        case AMDGPU::M0:
224          continue;
225        }
226
227        if (AMDGPU::SReg_32RegClass.contains(reg)) {
228          isSGPR = true;
229          width = 1;
230        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
231          isSGPR = false;
232          width = 1;
233        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
234          isSGPR = true;
235          width = 2;
236        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
237          isSGPR = false;
238          width = 2;
239        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
240          isSGPR = false;
241          width = 3;
242        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
243          isSGPR = true;
244          width = 4;
245        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
246          isSGPR = false;
247          width = 4;
248        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
249          isSGPR = true;
250          width = 8;
251        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
252          isSGPR = false;
253          width = 8;
254        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
255          isSGPR = true;
256          width = 16;
257        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
258          isSGPR = false;
259          width = 16;
260        } else {
261          llvm_unreachable("Unknown register class");
262        }
263        unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
264        unsigned maxUsed = hwReg + width - 1;
265        if (isSGPR) {
266          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
267        } else {
268          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
269        }
270      }
271    }
272  }
273
274  if (VCCUsed)
275    MaxSGPR += 2;
276
277  NumSGPR = MaxSGPR;
278  NumVGPR = MaxVGPR;
279}
280
281void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out,
282                                        MachineFunction &MF) const {
283  findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR);
284}
285
286void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
287                                         const SIProgramInfo &KernelInfo) {
288  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
289
290  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
291  unsigned RsrcReg;
292  switch (MFI->ShaderType) {
293  default: // Fall through
294  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
295  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
296  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
297  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
298  }
299
300  OutStreamer.EmitIntValue(RsrcReg, 4);
301  OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
302                           S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
303
304  unsigned LDSAlignShift;
305  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
306    // LDS is allocated in 64 dword blocks
307    LDSAlignShift = 8;
308  } else {
309    // LDS is allocated in 128 dword blocks
310    LDSAlignShift = 9;
311  }
312  unsigned LDSBlocks =
313          RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
314
315  if (MFI->ShaderType == ShaderType::COMPUTE) {
316    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
317    OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
318  }
319  if (MFI->ShaderType == ShaderType::PIXEL) {
320    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
321    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
322    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
323    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
324  }
325}
326