1//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//===----------------------------------------------------------------------===//
10
11
12#include "SIMachineFunctionInfo.h"
13#include "AMDGPUSubtarget.h"
14#include "SIInstrInfo.h"
15#include "llvm/CodeGen/MachineInstrBuilder.h"
16#include "llvm/CodeGen/MachineFrameInfo.h"
17#include "llvm/CodeGen/MachineRegisterInfo.h"
18#include "llvm/IR/Function.h"
19#include "llvm/IR/LLVMContext.h"
20
21#define MAX_LANES 64
22
23using namespace llvm;
24
25
26// Pin the vtable to this file.
27void SIMachineFunctionInfo::anchor() {}
28
29SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30  : AMDGPUMachineFunction(MF),
31    TIDReg(AMDGPU::NoRegister),
32    ScratchRSrcReg(AMDGPU::NoRegister),
33    ScratchWaveOffsetReg(AMDGPU::NoRegister),
34    PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35    DispatchPtrUserSGPR(AMDGPU::NoRegister),
36    QueuePtrUserSGPR(AMDGPU::NoRegister),
37    KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38    DispatchIDUserSGPR(AMDGPU::NoRegister),
39    FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40    PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41    GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42    GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43    GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44    WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45    WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46    WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47    WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48    PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
49    LDSWaveSpillSize(0),
50    PSInputAddr(0),
51    NumUserSGPRs(0),
52    NumSystemSGPRs(0),
53    HasSpilledSGPRs(false),
54    HasSpilledVGPRs(false),
55    PrivateSegmentBuffer(false),
56    DispatchPtr(false),
57    QueuePtr(false),
58    DispatchID(false),
59    KernargSegmentPtr(false),
60    FlatScratchInit(false),
61    GridWorkgroupCountX(false),
62    GridWorkgroupCountY(false),
63    GridWorkgroupCountZ(false),
64    WorkGroupIDX(true),
65    WorkGroupIDY(false),
66    WorkGroupIDZ(false),
67    WorkGroupInfo(false),
68    PrivateSegmentWaveByteOffset(false),
69    WorkItemIDX(true),
70    WorkItemIDY(false),
71    WorkItemIDZ(false) {
72  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
73  const Function *F = MF.getFunction();
74
75  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
76
77  if (getShaderType() == ShaderType::COMPUTE)
78    KernargSegmentPtr = true;
79
80  if (F->hasFnAttribute("amdgpu-work-group-id-y"))
81    WorkGroupIDY = true;
82
83  if (F->hasFnAttribute("amdgpu-work-group-id-z"))
84    WorkGroupIDZ = true;
85
86  if (F->hasFnAttribute("amdgpu-work-item-id-y"))
87    WorkItemIDY = true;
88
89  if (F->hasFnAttribute("amdgpu-work-item-id-z"))
90    WorkItemIDZ = true;
91
92  bool MaySpill = ST.isVGPRSpillingEnabled(this);
93  bool HasStackObjects = FrameInfo->hasStackObjects();
94
95  if (HasStackObjects || MaySpill)
96    PrivateSegmentWaveByteOffset = true;
97
98  if (ST.isAmdHsaOS()) {
99    if (HasStackObjects || MaySpill)
100      PrivateSegmentBuffer = true;
101
102    if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
103      DispatchPtr = true;
104  }
105
106  // X, XY, and XYZ are the only supported combinations, so make sure Y is
107  // enabled if Z is.
108  if (WorkItemIDZ)
109    WorkItemIDY = true;
110}
111
112unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
113  const SIRegisterInfo &TRI) {
114  PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
115    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
116  NumUserSGPRs += 4;
117  return PrivateSegmentBufferUserSGPR;
118}
119
120unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
121  DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
122    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
123  NumUserSGPRs += 2;
124  return DispatchPtrUserSGPR;
125}
126
127unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
128  QueuePtrUserSGPR = TRI.getMatchingSuperReg(
129    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
130  NumUserSGPRs += 2;
131  return QueuePtrUserSGPR;
132}
133
134unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
135  KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
136    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
137  NumUserSGPRs += 2;
138  return KernargSegmentPtrUserSGPR;
139}
140
141SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
142                                                       MachineFunction *MF,
143                                                       unsigned FrameIndex,
144                                                       unsigned SubIdx) {
145  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
146  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
147      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
148  MachineRegisterInfo &MRI = MF->getRegInfo();
149  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
150  Offset += SubIdx * 4;
151
152  unsigned LaneVGPRIdx = Offset / (64 * 4);
153  unsigned Lane = (Offset / 4) % 64;
154
155  struct SpilledReg Spill;
156
157  if (!LaneVGPRs.count(LaneVGPRIdx)) {
158    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
159    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
160
161    // Add this register as live-in to all blocks to avoid machine verifer
162    // complaining about use of an undefined physical register.
163    for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
164         BI != BE; ++BI) {
165      BI->addLiveIn(LaneVGPR);
166    }
167  }
168
169  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
170  Spill.Lane = Lane;
171  return Spill;
172}
173
174unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
175                                              const MachineFunction &MF) const {
176  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
177  // FIXME: We should get this information from kernel attributes if it
178  // is available.
179  return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
180}
181