1//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "SIMachineFunctionInfo.h"
11#include "AMDGPUSubtarget.h"
12#include "SIInstrInfo.h"
13#include "llvm/CodeGen/MachineFrameInfo.h"
14#include "llvm/CodeGen/MachineInstrBuilder.h"
15#include "llvm/CodeGen/MachineRegisterInfo.h"
16#include "llvm/IR/Function.h"
17#include "llvm/IR/LLVMContext.h"
18
19#define MAX_LANES 64
20
21using namespace llvm;
22
23static cl::opt<bool> EnableSpillSGPRToVGPR(
24  "amdgpu-spill-sgpr-to-vgpr",
25  cl::desc("Enable spilling VGPRs to SGPRs"),
26  cl::ReallyHidden,
27  cl::init(true));
28
29// Pin the vtable to this file.
30void SIMachineFunctionInfo::anchor() {}
31
32SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
33  : AMDGPUMachineFunction(MF),
34    TIDReg(AMDGPU::NoRegister),
35    ScratchRSrcReg(AMDGPU::NoRegister),
36    ScratchWaveOffsetReg(AMDGPU::NoRegister),
37    PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
38    DispatchPtrUserSGPR(AMDGPU::NoRegister),
39    QueuePtrUserSGPR(AMDGPU::NoRegister),
40    KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
41    DispatchIDUserSGPR(AMDGPU::NoRegister),
42    FlatScratchInitUserSGPR(AMDGPU::NoRegister),
43    PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
44    GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
45    GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
46    GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
47    WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
48    WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
49    WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
50    WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
51    PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
52    PSInputAddr(0),
53    ReturnsVoid(true),
54    MaximumWorkGroupSize(0),
55    DebuggerReservedVGPRCount(0),
56    DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
57    DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
58    LDSWaveSpillSize(0),
59    PSInputEna(0),
60    NumUserSGPRs(0),
61    NumSystemSGPRs(0),
62    HasSpilledSGPRs(false),
63    HasSpilledVGPRs(false),
64    HasNonSpillStackObjects(false),
65    HasFlatInstructions(false),
66    NumSpilledSGPRs(0),
67    NumSpilledVGPRs(0),
68    PrivateSegmentBuffer(false),
69    DispatchPtr(false),
70    QueuePtr(false),
71    DispatchID(false),
72    KernargSegmentPtr(false),
73    FlatScratchInit(false),
74    GridWorkgroupCountX(false),
75    GridWorkgroupCountY(false),
76    GridWorkgroupCountZ(false),
77    WorkGroupIDX(false),
78    WorkGroupIDY(false),
79    WorkGroupIDZ(false),
80    WorkGroupInfo(false),
81    PrivateSegmentWaveByteOffset(false),
82    WorkItemIDX(false),
83    WorkItemIDY(false),
84    WorkItemIDZ(false) {
85  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
86  const Function *F = MF.getFunction();
87
88  PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
89
90  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
91
92  if (!AMDGPU::isShader(F->getCallingConv())) {
93    KernargSegmentPtr = true;
94    WorkGroupIDX = true;
95    WorkItemIDX = true;
96  }
97
98  if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
99    WorkGroupIDY = true;
100
101  if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
102    WorkGroupIDZ = true;
103
104  if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
105    WorkItemIDY = true;
106
107  if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
108    WorkItemIDZ = true;
109
110  // X, XY, and XYZ are the only supported combinations, so make sure Y is
111  // enabled if Z is.
112  if (WorkItemIDZ)
113    WorkItemIDY = true;
114
115  bool MaySpill = ST.isVGPRSpillingEnabled(*F);
116  bool HasStackObjects = FrameInfo->hasStackObjects();
117
118  if (HasStackObjects || MaySpill)
119    PrivateSegmentWaveByteOffset = true;
120
121  if (ST.isAmdHsaOS()) {
122    if (HasStackObjects || MaySpill)
123      PrivateSegmentBuffer = true;
124
125    if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
126      DispatchPtr = true;
127
128    if (F->hasFnAttribute("amdgpu-queue-ptr"))
129      QueuePtr = true;
130  }
131
132  // We don't need to worry about accessing spills with flat instructions.
133  // TODO: On VI where we must use flat for global, we should be able to omit
134  // this if it is never used for generic access.
135  if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
136      ST.isAmdHsaOS())
137    FlatScratchInit = true;
138
139  if (AMDGPU::isCompute(F->getCallingConv()))
140    MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F);
141  else
142    MaximumWorkGroupSize = ST.getWavefrontSize();
143
144  if (ST.debuggerReserveRegs())
145    DebuggerReservedVGPRCount = 4;
146}
147
148unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
149  const SIRegisterInfo &TRI) {
150  PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
151    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
152  NumUserSGPRs += 4;
153  return PrivateSegmentBufferUserSGPR;
154}
155
156unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
157  DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
158    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
159  NumUserSGPRs += 2;
160  return DispatchPtrUserSGPR;
161}
162
163unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
164  QueuePtrUserSGPR = TRI.getMatchingSuperReg(
165    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
166  NumUserSGPRs += 2;
167  return QueuePtrUserSGPR;
168}
169
170unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
171  KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
172    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
173  NumUserSGPRs += 2;
174  return KernargSegmentPtrUserSGPR;
175}
176
177unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
178  FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
179    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
180  NumUserSGPRs += 2;
181  return FlatScratchInitUserSGPR;
182}
183
184SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
185                                                       MachineFunction *MF,
186                                                       unsigned FrameIndex,
187                                                       unsigned SubIdx) {
188  if (!EnableSpillSGPRToVGPR)
189    return SpilledReg();
190
191  const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
192  const SIRegisterInfo *TRI = ST.getRegisterInfo();
193
194  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
195  MachineRegisterInfo &MRI = MF->getRegInfo();
196  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
197  Offset += SubIdx * 4;
198
199  unsigned LaneVGPRIdx = Offset / (64 * 4);
200  unsigned Lane = (Offset / 4) % 64;
201
202  struct SpilledReg Spill;
203  Spill.Lane = Lane;
204
205  if (!LaneVGPRs.count(LaneVGPRIdx)) {
206    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
207
208    if (LaneVGPR == AMDGPU::NoRegister)
209      // We have no VGPRs left for spilling SGPRs.
210      return Spill;
211
212    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
213
214    // Add this register as live-in to all blocks to avoid machine verifer
215    // complaining about use of an undefined physical register.
216    for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
217         BI != BE; ++BI) {
218      BI->addLiveIn(LaneVGPR);
219    }
220  }
221
222  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
223  return Spill;
224}
225
226unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
227                                              const MachineFunction &MF) const {
228  return MaximumWorkGroupSize;
229}
230