AMDGPUAsmPrinter.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//                     The LLVM Compiler Infrastructure
4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source
6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details.
7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file
11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard///
12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// code.  When passed an MCAsmStreamer it prints assembly and when passed
14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// an MCObjectStreamer it outputs binary code.
15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUAsmPrinter.h"
21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h"
22f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard#include "R600Defines.h"
232a74639bc7713146b1182328892807c421c84265Vincent Lejeune#include "R600MachineFunctionInfo.h"
24141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune#include "R600RegisterInfo.h"
255c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIDefines.h"
265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIMachineFunctionInfo.h"
275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIRegisterInfo.h"
28bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCContext.h"
29bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCSectionELF.h"
30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/MC/MCStreamer.h"
31bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/Support/ELF.h"
32e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard#include "llvm/Support/MathExtras.h"
33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/Support/TargetRegistry.h"
3458a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "llvm/Target/TargetLoweringObjectFile.h"
35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm;
37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
39f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardstatic AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
40f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                              MCStreamer &Streamer) {
41f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  return new AMDGPUAsmPrinter(tm, Streamer);
42f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
43f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
44f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardextern "C" void LLVMInitializeR600AsmPrinter() {
45f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
46f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
47f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
48f9318673178309288f9320efe02d529419ac32a2Tom StellardAMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
4936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    : AsmPrinter(TM, Streamer) {
5036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
51f9318673178309288f9320efe02d529419ac32a2Tom Stellard}
52f9318673178309288f9320efe02d529419ac32a2Tom Stellard
53f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
54f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  SetupMachineFunction(MF);
5536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
5636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
57141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
58f9318673178309288f9320efe02d529419ac32a2Tom Stellard  MCContext &Context = getObjFileLowering().getContext();
59f9318673178309288f9320efe02d529419ac32a2Tom Stellard  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
6087cba4a4c1d5b8b026c83b0916b37255600ecd5fTom Stellard                                              ELF::SHT_PROGBITS, 0,
61141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                                              SectionKind::getReadOnly());
62141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  OutStreamer.SwitchSection(ConfigSection);
6336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
64f9318673178309288f9320efe02d529419ac32a2Tom Stellard  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
6536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SIProgramInfo KernelInfo;
663ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
6736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR);
6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    EmitProgramInfoSI(MF, KernelInfo);
69141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  } else {
70141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    EmitProgramInfoR600(MF);
71f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
72f9318673178309288f9320efe02d529419ac32a2Tom Stellard
73f9318673178309288f9320efe02d529419ac32a2Tom Stellard  DisasmLines.clear();
74f9318673178309288f9320efe02d529419ac32a2Tom Stellard  HexLines.clear();
75f9318673178309288f9320efe02d529419ac32a2Tom Stellard  DisasmLineMaxLen = 0;
76f9318673178309288f9320efe02d529419ac32a2Tom Stellard
77bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
78f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  EmitFunctionBody();
79f9318673178309288f9320efe02d529419ac32a2Tom Stellard
8036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (isVerbose()) {
8136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    const MCSectionELF *CommentSection
8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      = Context.getELFSection(".AMDGPU.csdata",
8336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              ELF::SHT_PROGBITS, 0,
8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              SectionKind::getReadOnly());
8536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    OutStreamer.SwitchSection(CommentSection);
8636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
8836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(" Kernel info:", false);
8936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
9036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                 false);
9136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
9236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                 false);
9336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    } else {
9436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
9536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(
9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
9736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
100f9318673178309288f9320efe02d529419ac32a2Tom Stellard  if (STM.dumpCode()) {
101f9318673178309288f9320efe02d529419ac32a2Tom Stellard#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
102f9318673178309288f9320efe02d529419ac32a2Tom Stellard    MF.dump();
103f9318673178309288f9320efe02d529419ac32a2Tom Stellard#endif
104f9318673178309288f9320efe02d529419ac32a2Tom Stellard
105f9318673178309288f9320efe02d529419ac32a2Tom Stellard    if (DisasmEnabled) {
106f9318673178309288f9320efe02d529419ac32a2Tom Stellard      OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
107f9318673178309288f9320efe02d529419ac32a2Tom Stellard                                                  ELF::SHT_NOTE, 0,
108f9318673178309288f9320efe02d529419ac32a2Tom Stellard                                                  SectionKind::getReadOnly()));
109f9318673178309288f9320efe02d529419ac32a2Tom Stellard
110f9318673178309288f9320efe02d529419ac32a2Tom Stellard      for (size_t i = 0; i < DisasmLines.size(); ++i) {
111f9318673178309288f9320efe02d529419ac32a2Tom Stellard        std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
112f9318673178309288f9320efe02d529419ac32a2Tom Stellard        Comment += " ; " + HexLines[i] + "\n";
113f9318673178309288f9320efe02d529419ac32a2Tom Stellard
114f9318673178309288f9320efe02d529419ac32a2Tom Stellard        OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
115f9318673178309288f9320efe02d529419ac32a2Tom Stellard        OutStreamer.EmitBytes(StringRef(Comment));
116f9318673178309288f9320efe02d529419ac32a2Tom Stellard      }
117f9318673178309288f9320efe02d529419ac32a2Tom Stellard    }
118f9318673178309288f9320efe02d529419ac32a2Tom Stellard  }
119f9318673178309288f9320efe02d529419ac32a2Tom Stellard
120f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  return false;
121f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
122f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
123141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeunevoid AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
124141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  unsigned MaxGPR = 0;
12586cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune  bool killPixel = false;
126141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  const R600RegisterInfo * RI =
127141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
1282a74639bc7713146b1182328892807c421c84265Vincent Lejeune  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
129f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
130141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
131141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
132141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                                                  BB != BB_E; ++BB) {
133141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    MachineBasicBlock &MBB = *BB;
134141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
135141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                                                    I != E; ++I) {
136141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      MachineInstr &MI = *I;
13786cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune      if (MI.getOpcode() == AMDGPU::KILLGT)
13886cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune        killPixel = true;
139141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      unsigned numOperands = MI.getNumOperands();
140141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
141141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        MachineOperand & MO = MI.getOperand(op_idx);
142141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        if (!MO.isReg())
143141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune          continue;
144141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
145141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
146141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        // Register with value > 127 aren't GPR
147141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        if (HWReg > 127)
148141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune          continue;
149141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        MaxGPR = std::max(MaxGPR, HWReg);
150141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      }
151141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    }
152141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  }
153f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard
154f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  unsigned RsrcReg;
1553ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
156f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    // Evergreen / Northern Islands
157f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    switch (MFI->ShaderType) {
158f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    default: // Fall through
159f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
160f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
161f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
162f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
163f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    }
164f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  } else {
165f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    // R600 / R700
166f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    switch (MFI->ShaderType) {
167f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    default: // Fall through
168f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::GEOMETRY: // Fall through
169f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::COMPUTE:  // Fall through
170f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
171f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
172f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    }
173f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  }
174f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard
175f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(RsrcReg, 4);
176f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
177f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard                           S_STACK_SIZE(MFI->StackSize), 4);
178f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
179f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
180e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard
181e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard  if (MFI->ShaderType == ShaderType::COMPUTE) {
182e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
183e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
184e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard  }
185141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune}
186141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                              unsigned &NumSGPR,
18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                              unsigned &NumVGPR) const {
190f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  unsigned MaxSGPR = 0;
191f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  unsigned MaxVGPR = 0;
192f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  bool VCCUsed = false;
193f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  const SIRegisterInfo * RI =
194f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
195f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
196f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
197f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                                  BB != BB_E; ++BB) {
198f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    MachineBasicBlock &MBB = *BB;
199f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
200f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                                    I != E; ++I) {
201f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      MachineInstr &MI = *I;
202f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
203f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      unsigned numOperands = MI.getNumOperands();
204f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
20529a651af8a4b522daf1f9102c93e4c8ecc2ef3c2Matt Arsenault        MachineOperand &MO = MI.getOperand(op_idx);
206f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        unsigned width = 0;
207f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        bool isSGPR = false;
20836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
209f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        if (!MO.isReg()) {
210f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          continue;
211f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
21236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        unsigned reg = MO.getReg();
21336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
21436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines	    reg == AMDGPU::VCC_HI) {
215f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          VCCUsed = true;
216f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          continue;
217f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
2188305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault
219f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        switch (reg) {
220f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        default: break;
2218305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault        case AMDGPU::SCC:
222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        case AMDGPU::EXEC:
223f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        case AMDGPU::M0:
224f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          continue;
225f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        if (AMDGPU::SReg_32RegClass.contains(reg)) {
228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
229f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 1;
230f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = false;
232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 1;
233f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 2;
236f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = false;
238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 2;
2394d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
2404d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig          isSGPR = false;
2414d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig          width = 3;
242f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 4;
245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
246f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = false;
247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 4;
248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
249f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 8;
25136ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
25236ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          isSGPR = false;
25336ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          width = 8;
254f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
255f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard          isSGPR = true;
256f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard          width = 16;
25736ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
25836ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          isSGPR = false;
25936ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          width = 16;
260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else {
26136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          llvm_unreachable("Unknown register class");
262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
26336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
26436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        unsigned maxUsed = hwReg + width - 1;
265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        if (isSGPR) {
266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
267f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else {
268f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
269f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      }
271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    }
272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
27336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
27436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (VCCUsed)
275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    MaxSGPR += 2;
27636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
27736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  NumSGPR = MaxSGPR;
27836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  NumVGPR = MaxVGPR;
27936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
28136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out,
28236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                        MachineFunction &MF) const {
28336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR);
28436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
28536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
28636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
28736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                         const SIProgramInfo &KernelInfo) {
28836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
28936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
29036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2919a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  unsigned RsrcReg;
2929a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  switch (MFI->ShaderType) {
2939a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  default: // Fall through
2949a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
2959a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
2969a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
2979a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
2989a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  }
2999a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard
3009a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  OutStreamer.EmitIntValue(RsrcReg, 4);
30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
30236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                           S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
303a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer
30454328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  unsigned LDSAlignShift;
30554328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
30654328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    // LDS is allocated in 64 dword blocks
30754328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    LDSAlignShift = 8;
30854328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  } else {
30954328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    // LDS is allocated in 128 dword blocks
31054328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    LDSAlignShift = 9;
31154328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  }
31254328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  unsigned LDSBlocks =
31354328c772c5519e56c13667c2b1d1e830580c44dTom Stellard          RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
31454328c772c5519e56c13667c2b1d1e830580c44dTom Stellard
315a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  if (MFI->ShaderType == ShaderType::COMPUTE) {
316a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
31754328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
318a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  }
3199a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  if (MFI->ShaderType == ShaderType::PIXEL) {
320a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
32154328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
3229a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
3239a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
3249a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  }
325f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
326