1//===-- SIMachineFunctionInfo.cpp -------- SI Machine Function Info -------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#include "SIMachineFunctionInfo.h" 11#include "AMDGPUSubtarget.h" 12#include "SIInstrInfo.h" 13#include "llvm/CodeGen/MachineFrameInfo.h" 14#include "llvm/CodeGen/MachineInstrBuilder.h" 15#include "llvm/CodeGen/MachineRegisterInfo.h" 16#include "llvm/IR/Function.h" 17#include "llvm/IR/LLVMContext.h" 18 19#define MAX_LANES 64 20 21using namespace llvm; 22 23static cl::opt<bool> EnableSpillSGPRToVGPR( 24 "amdgpu-spill-sgpr-to-vgpr", 25 cl::desc("Enable spilling VGPRs to SGPRs"), 26 cl::ReallyHidden, 27 cl::init(true)); 28 29// Pin the vtable to this file. 30void SIMachineFunctionInfo::anchor() {} 31 32SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 33 : AMDGPUMachineFunction(MF), 34 TIDReg(AMDGPU::NoRegister), 35 ScratchRSrcReg(AMDGPU::NoRegister), 36 ScratchWaveOffsetReg(AMDGPU::NoRegister), 37 PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 38 DispatchPtrUserSGPR(AMDGPU::NoRegister), 39 QueuePtrUserSGPR(AMDGPU::NoRegister), 40 KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 41 DispatchIDUserSGPR(AMDGPU::NoRegister), 42 FlatScratchInitUserSGPR(AMDGPU::NoRegister), 43 PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 44 GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 45 GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 46 GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 47 WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 48 WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 49 WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 50 WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 51 PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 52 PSInputAddr(0), 53 ReturnsVoid(true), 54 MaximumWorkGroupSize(0), 55 DebuggerReservedVGPRCount(0), 56 DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}), 57 DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}), 58 LDSWaveSpillSize(0), 59 PSInputEna(0), 60 NumUserSGPRs(0), 61 NumSystemSGPRs(0), 62 HasSpilledSGPRs(false), 63 HasSpilledVGPRs(false), 64 HasNonSpillStackObjects(false), 65 HasFlatInstructions(false), 66 NumSpilledSGPRs(0), 67 NumSpilledVGPRs(0), 68 PrivateSegmentBuffer(false), 69 DispatchPtr(false), 70 QueuePtr(false), 71 DispatchID(false), 72 KernargSegmentPtr(false), 73 FlatScratchInit(false), 74 GridWorkgroupCountX(false), 75 GridWorkgroupCountY(false), 76 GridWorkgroupCountZ(false), 77 WorkGroupIDX(false), 78 WorkGroupIDY(false), 79 WorkGroupIDZ(false), 80 WorkGroupInfo(false), 81 PrivateSegmentWaveByteOffset(false), 82 WorkItemIDX(false), 83 WorkItemIDY(false), 84 WorkItemIDZ(false) { 85 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 86 const Function *F = MF.getFunction(); 87 88 PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 89 90 const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 91 92 if (!AMDGPU::isShader(F->getCallingConv())) { 93 KernargSegmentPtr = true; 94 WorkGroupIDX = true; 95 WorkItemIDX = true; 96 } 97 98 if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue()) 99 WorkGroupIDY = true; 100 101 if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue()) 102 WorkGroupIDZ = true; 103 104 if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue()) 105 WorkItemIDY = true; 106 107 if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue()) 108 WorkItemIDZ = true; 109 110 // X, XY, and XYZ are the only supported combinations, so make sure Y is 111 // enabled if Z is. 112 if (WorkItemIDZ) 113 WorkItemIDY = true; 114 115 bool MaySpill = ST.isVGPRSpillingEnabled(*F); 116 bool HasStackObjects = FrameInfo->hasStackObjects(); 117 118 if (HasStackObjects || MaySpill) 119 PrivateSegmentWaveByteOffset = true; 120 121 if (ST.isAmdHsaOS()) { 122 if (HasStackObjects || MaySpill) 123 PrivateSegmentBuffer = true; 124 125 if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 126 DispatchPtr = true; 127 128 if (F->hasFnAttribute("amdgpu-queue-ptr")) 129 QueuePtr = true; 130 } 131 132 // We don't need to worry about accessing spills with flat instructions. 133 // TODO: On VI where we must use flat for global, we should be able to omit 134 // this if it is never used for generic access. 135 if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS && 136 ST.isAmdHsaOS()) 137 FlatScratchInit = true; 138 139 if (AMDGPU::isCompute(F->getCallingConv())) 140 MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F); 141 else 142 MaximumWorkGroupSize = ST.getWavefrontSize(); 143 144 if (ST.debuggerReserveRegs()) 145 DebuggerReservedVGPRCount = 4; 146} 147 148unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 149 const SIRegisterInfo &TRI) { 150 PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 151 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 152 NumUserSGPRs += 4; 153 return PrivateSegmentBufferUserSGPR; 154} 155 156unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 157 DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 158 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 159 NumUserSGPRs += 2; 160 return DispatchPtrUserSGPR; 161} 162 163unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 164 QueuePtrUserSGPR = TRI.getMatchingSuperReg( 165 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 166 NumUserSGPRs += 2; 167 return QueuePtrUserSGPR; 168} 169 170unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 171 KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 172 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 173 NumUserSGPRs += 2; 174 return KernargSegmentPtrUserSGPR; 175} 176 177unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 178 FlatScratchInitUserSGPR = TRI.getMatchingSuperReg( 179 getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 180 NumUserSGPRs += 2; 181 return FlatScratchInitUserSGPR; 182} 183 184SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( 185 MachineFunction *MF, 186 unsigned FrameIndex, 187 unsigned SubIdx) { 188 if (!EnableSpillSGPRToVGPR) 189 return SpilledReg(); 190 191 const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 192 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 193 194 MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 195 MachineRegisterInfo &MRI = MF->getRegInfo(); 196 int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 197 Offset += SubIdx * 4; 198 199 unsigned LaneVGPRIdx = Offset / (64 * 4); 200 unsigned Lane = (Offset / 4) % 64; 201 202 struct SpilledReg Spill; 203 Spill.Lane = Lane; 204 205 if (!LaneVGPRs.count(LaneVGPRIdx)) { 206 unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 207 208 if (LaneVGPR == AMDGPU::NoRegister) 209 // We have no VGPRs left for spilling SGPRs. 210 return Spill; 211 212 LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 213 214 // Add this register as live-in to all blocks to avoid machine verifer 215 // complaining about use of an undefined physical register. 216 for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 217 BI != BE; ++BI) { 218 BI->addLiveIn(LaneVGPR); 219 } 220 } 221 222 Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 223 return Spill; 224} 225 226unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 227 const MachineFunction &MF) const { 228 return MaximumWorkGroupSize; 229} 230