1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//                     The LLVM Compiler Infrastructure
4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source
6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details.
7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file
11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard///
12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// code.  When passed an MCAsmStreamer it prints assembly and when passed
14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// an MCObjectStreamer it outputs binary code.
15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUAsmPrinter.h"
21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPU.h"
22cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "AMDGPUSubtarget.h"
23f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard#include "R600Defines.h"
242a74639bc7713146b1182328892807c421c84265Vincent Lejeune#include "R600MachineFunctionInfo.h"
25141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune#include "R600RegisterInfo.h"
265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIDefines.h"
275c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIMachineFunctionInfo.h"
285c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "SIRegisterInfo.h"
29bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCContext.h"
30bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/MC/MCSectionELF.h"
31f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/MC/MCStreamer.h"
32bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard#include "llvm/Support/ELF.h"
33e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard#include "llvm/Support/MathExtras.h"
34f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/Support/TargetRegistry.h"
3558a2cbef4aac9ee7d530dfb690c78d6fc11a2371Chandler Carruth#include "llvm/Target/TargetLoweringObjectFile.h"
36f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
37f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm;
38f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
39cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// TODO: This should get the default rounding mode from the kernel. We just set
40cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// the default here, but this could change if the OpenCL rounding mode pragmas
41cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// are used.
42cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines//
43cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// The denormal mode here should match what is reported by the OpenCL runtime
44cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
45cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// can also be override to flush with the -cl-denorms-are-zero compiler flag.
46cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines//
47cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
48cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// precision, and leaves single precision to flush all and does not report
49cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
50cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// CL_FP_DENORM for both.
51cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesstatic uint32_t getFPMode(MachineFunction &) {
52cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
53cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
54cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
55cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
56cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines}
57f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
58f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardstatic AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
59f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                              MCStreamer &Streamer) {
60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  return new AMDGPUAsmPrinter(tm, Streamer);
61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
62f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
63f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardextern "C" void LLVMInitializeR600AsmPrinter() {
64f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
65f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
66f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
67f9318673178309288f9320efe02d529419ac32a2Tom StellardAMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
6836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    : AsmPrinter(TM, Streamer) {
6936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode();
70f9318673178309288f9320efe02d529419ac32a2Tom Stellard}
71f9318673178309288f9320efe02d529419ac32a2Tom Stellard
72f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardbool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
73f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  SetupMachineFunction(MF);
7436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
7536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  OutStreamer.emitRawComment(Twine('@') + MF.getName() + Twine(':'));
76141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
77f9318673178309288f9320efe02d529419ac32a2Tom Stellard  MCContext &Context = getObjFileLowering().getContext();
78f9318673178309288f9320efe02d529419ac32a2Tom Stellard  const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
7987cba4a4c1d5b8b026c83b0916b37255600ecd5fTom Stellard                                              ELF::SHT_PROGBITS, 0,
80141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                                              SectionKind::getReadOnly());
81141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  OutStreamer.SwitchSection(ConfigSection);
8236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
83f9318673178309288f9320efe02d529419ac32a2Tom Stellard  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
8436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SIProgramInfo KernelInfo;
853ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard  if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    getSIProgramInfo(KernelInfo, MF);
8736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    EmitProgramInfoSI(MF, KernelInfo);
88141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  } else {
89141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    EmitProgramInfoR600(MF);
90f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
91f9318673178309288f9320efe02d529419ac32a2Tom Stellard
92f9318673178309288f9320efe02d529419ac32a2Tom Stellard  DisasmLines.clear();
93f9318673178309288f9320efe02d529419ac32a2Tom Stellard  HexLines.clear();
94f9318673178309288f9320efe02d529419ac32a2Tom Stellard  DisasmLineMaxLen = 0;
95f9318673178309288f9320efe02d529419ac32a2Tom Stellard
96bf1efe642111043eeb7ccaf3da759f4d2d1e4647Tom Stellard  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
97f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  EmitFunctionBody();
98f9318673178309288f9320efe02d529419ac32a2Tom Stellard
9936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (isVerbose()) {
10036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    const MCSectionELF *CommentSection
10136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      = Context.getELFSection(".AMDGPU.csdata",
10236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              ELF::SHT_PROGBITS, 0,
10336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              SectionKind::getReadOnly());
10436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    OutStreamer.SwitchSection(CommentSection);
10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
106dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(" Kernel info:", false);
108dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                 false);
11036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
11136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                 false);
11236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
11336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                 false);
114cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
115cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                 false);
116cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
117cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                 false);
11836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    } else {
11936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
12036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      OutStreamer.emitRawComment(
12136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
12236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
12336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
12436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
125f9318673178309288f9320efe02d529419ac32a2Tom Stellard  if (STM.dumpCode()) {
126f9318673178309288f9320efe02d529419ac32a2Tom Stellard#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
127f9318673178309288f9320efe02d529419ac32a2Tom Stellard    MF.dump();
128f9318673178309288f9320efe02d529419ac32a2Tom Stellard#endif
129f9318673178309288f9320efe02d529419ac32a2Tom Stellard
130f9318673178309288f9320efe02d529419ac32a2Tom Stellard    if (DisasmEnabled) {
131f9318673178309288f9320efe02d529419ac32a2Tom Stellard      OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
132f9318673178309288f9320efe02d529419ac32a2Tom Stellard                                                  ELF::SHT_NOTE, 0,
133f9318673178309288f9320efe02d529419ac32a2Tom Stellard                                                  SectionKind::getReadOnly()));
134f9318673178309288f9320efe02d529419ac32a2Tom Stellard
135f9318673178309288f9320efe02d529419ac32a2Tom Stellard      for (size_t i = 0; i < DisasmLines.size(); ++i) {
136f9318673178309288f9320efe02d529419ac32a2Tom Stellard        std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
137f9318673178309288f9320efe02d529419ac32a2Tom Stellard        Comment += " ; " + HexLines[i] + "\n";
138f9318673178309288f9320efe02d529419ac32a2Tom Stellard
139f9318673178309288f9320efe02d529419ac32a2Tom Stellard        OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
140f9318673178309288f9320efe02d529419ac32a2Tom Stellard        OutStreamer.EmitBytes(StringRef(Comment));
141f9318673178309288f9320efe02d529419ac32a2Tom Stellard      }
142f9318673178309288f9320efe02d529419ac32a2Tom Stellard    }
143f9318673178309288f9320efe02d529419ac32a2Tom Stellard  }
144f9318673178309288f9320efe02d529419ac32a2Tom Stellard
145f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  return false;
146f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
147f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
148141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeunevoid AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
149141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  unsigned MaxGPR = 0;
15086cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune  bool killPixel = false;
151141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  const R600RegisterInfo * RI =
152141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
1532a74639bc7713146b1182328892807c421c84265Vincent Lejeune  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
154f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
155141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
156141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
157141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                                                  BB != BB_E; ++BB) {
158141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    MachineBasicBlock &MBB = *BB;
159141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
160141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune                                                    I != E; ++I) {
161141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      MachineInstr &MI = *I;
16286cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune      if (MI.getOpcode() == AMDGPU::KILLGT)
16386cdb704174828b8e91e94132a19634e3c11d87dVincent Lejeune        killPixel = true;
164141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      unsigned numOperands = MI.getNumOperands();
165141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
166141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        MachineOperand & MO = MI.getOperand(op_idx);
167141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        if (!MO.isReg())
168141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune          continue;
169141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
170141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
171141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        // Register with value > 127 aren't GPR
172141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        if (HWReg > 127)
173141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune          continue;
174141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune        MaxGPR = std::max(MaxGPR, HWReg);
175141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune      }
176141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune    }
177141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune  }
178f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard
179f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  unsigned RsrcReg;
1803ff0abfaabc2c7f604d490be587b9c27e7c91ac0Tom Stellard  if (STM.getGeneration() >= AMDGPUSubtarget::EVERGREEN) {
181f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    // Evergreen / Northern Islands
182f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    switch (MFI->ShaderType) {
183f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    default: // Fall through
184f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
185f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
186f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
187f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
188f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    }
189f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  } else {
190f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    // R600 / R700
191f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    switch (MFI->ShaderType) {
192f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    default: // Fall through
193f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::GEOMETRY: // Fall through
194f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::COMPUTE:  // Fall through
195f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
196f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
197f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard    }
198f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  }
199f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard
200f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(RsrcReg, 4);
201f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
202f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard                           S_STACK_SIZE(MFI->StackSize), 4);
203f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
204f07b5373d7493d29cd758ababf135c2d0d8da127Tom Stellard  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
205e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard
206e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard  if (MFI->ShaderType == ShaderType::COMPUTE) {
207e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard    OutStreamer.EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
208e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard    OutStreamer.EmitIntValue(RoundUpToAlignment(MFI->LDSSize, 4) >> 2, 4);
209e3d4cbc7d25061441adafa47450a31571c87bf85Tom Stellard  }
210141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune}
211141ca7fc6488bfb20ad59854cc12039e16688ed3Vincent Lejeune
212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
213dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                        MachineFunction &MF) const {
214dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  uint64_t CodeSize = 0;
215f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  unsigned MaxSGPR = 0;
216f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  unsigned MaxVGPR = 0;
217f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  bool VCCUsed = false;
218f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  const SIRegisterInfo * RI =
219f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
220f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
221f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                                  BB != BB_E; ++BB) {
223f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    MachineBasicBlock &MBB = *BB;
224f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
225f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                                    I != E; ++I) {
226f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      MachineInstr &MI = *I;
227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
228dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // TODO: CodeSize should account for multiple functions.
229dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      CodeSize += MI.getDesc().Size;
230dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
231f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      unsigned numOperands = MI.getNumOperands();
232f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
23329a651af8a4b522daf1f9102c93e4c8ecc2ef3c2Matt Arsenault        MachineOperand &MO = MI.getOperand(op_idx);
234f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        unsigned width = 0;
235f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        bool isSGPR = false;
23636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
237f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        if (!MO.isReg()) {
238f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          continue;
239f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
24036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        unsigned reg = MO.getReg();
24136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
24236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines	    reg == AMDGPU::VCC_HI) {
243f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          VCCUsed = true;
244f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          continue;
245f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
2468305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault
247f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        switch (reg) {
248f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        default: break;
2498305cae0045f804b7cca7c69f83f20ad847817bdMatt Arsenault        case AMDGPU::SCC:
250f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        case AMDGPU::EXEC:
251f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        case AMDGPU::M0:
252f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          continue;
253f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
254f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
255f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        if (AMDGPU::SReg_32RegClass.contains(reg)) {
256f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
257f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 1;
258f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
259f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = false;
260f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 1;
261f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
262f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
263f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 2;
264f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
265f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = false;
266f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 2;
2674d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
2684d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig          isSGPR = false;
2694d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig          width = 3;
270f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
271f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
272f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 4;
273f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
274f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = false;
275f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 4;
276f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
277f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          isSGPR = true;
278f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          width = 8;
27936ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard        } else if (AMDGPU::VReg_256RegClass.contains(reg)) {
28036ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          isSGPR = false;
28136ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          width = 8;
282f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard        } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
283f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard          isSGPR = true;
284f9e5c398119a77202dc0f7861f5131a7b9f7b8b3Tom Stellard          width = 16;
28536ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard        } else if (AMDGPU::VReg_512RegClass.contains(reg)) {
28636ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          isSGPR = false;
28736ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard          width = 16;
288f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else {
28936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          llvm_unreachable("Unknown register class");
290f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
29136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        unsigned hwReg = RI->getEncodingValue(reg) & 0xff;
29236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        unsigned maxUsed = hwReg + width - 1;
293f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        if (isSGPR) {
294f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
295f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        } else {
296f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
297f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        }
298f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      }
299f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    }
300f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
30136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
30236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (VCCUsed)
303f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    MaxSGPR += 2;
30436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
305dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ProgInfo.NumVGPR = MaxVGPR;
306cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ProgInfo.NumSGPR = MaxSGPR;
307cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
308cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
309cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // register.
310cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ProgInfo.FloatMode = getFPMode(MF);
311cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
312cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // XXX: Not quite sure what this does, but sc seems to unset this.
313cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ProgInfo.IEEEMode = 0;
314cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
315cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Do not clamp NAN to 0.
316cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ProgInfo.DX10Clamp = 0;
317cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
318cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ProgInfo.CodeLen = CodeSize;
31936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
32036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
32136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
32236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                         const SIProgramInfo &KernelInfo) {
32336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
32436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
325cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
3269a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  unsigned RsrcReg;
3279a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  switch (MFI->ShaderType) {
3289a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  default: // Fall through
3299a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
3309a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
3319a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
3329a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
3339a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  }
3349a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard
33554328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  unsigned LDSAlignShift;
33654328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
337cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // LDS is allocated in 64 dword blocks.
33854328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    LDSAlignShift = 8;
33954328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  } else {
340cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // LDS is allocated in 128 dword blocks.
34154328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    LDSAlignShift = 9;
34254328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  }
343cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
34454328c772c5519e56c13667c2b1d1e830580c44dTom Stellard  unsigned LDSBlocks =
345cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
34654328c772c5519e56c13667c2b1d1e830580c44dTom Stellard
347a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  if (MFI->ShaderType == ShaderType::COMPUTE) {
348cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
349cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
350cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    const uint32_t ComputePGMRSrc1 =
351cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
352cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
353cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_PRIORITY(KernelInfo.Priority) |
354cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
355cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_PRIV(KernelInfo.Priv) |
356cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
357cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
358cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
359cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
360cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
361cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
362a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer    OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
36354328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
364cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  } else {
365cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    OutStreamer.EmitIntValue(RsrcReg, 4);
366cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
367cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                             S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
368a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  }
369cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
3709a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  if (MFI->ShaderType == ShaderType::PIXEL) {
371a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer    OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
37254328c772c5519e56c13667c2b1d1e830580c44dTom Stellard    OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
3739a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
3749a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
3759a256300f8f61937f5f7a148b9cb09936d103a97Tom Stellard  }
376f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
377