1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file  vktSparseResourcesBufferSparseResidency.cpp
21 * \brief Sparse partially resident buffers tests
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSparseResourcesBufferSparseResidency.hpp"
25#include "vktSparseResourcesTestsUtil.hpp"
26#include "vktSparseResourcesBase.hpp"
27#include "vktTestCaseUtil.hpp"
28
29#include "vkDefs.hpp"
30#include "vkRef.hpp"
31#include "vkRefUtil.hpp"
32#include "vkPlatform.hpp"
33#include "vkPrograms.hpp"
34#include "vkRefUtil.hpp"
35#include "vkMemUtil.hpp"
36#include "vkQueryUtil.hpp"
37#include "vkBuilderUtil.hpp"
38#include "vkTypeUtil.hpp"
39
40#include "deStringUtil.hpp"
41#include "deUniquePtr.hpp"
42
43#include <string>
44#include <vector>
45
46using namespace vk;
47
48namespace vkt
49{
50namespace sparse
51{
52namespace
53{
54
55enum ShaderParameters
56{
57	SIZE_OF_UINT_IN_SHADER = 4u,
58};
59
60class BufferSparseResidencyCase : public TestCase
61{
62public:
63					BufferSparseResidencyCase	(tcu::TestContext&		testCtx,
64												 const std::string&		name,
65												 const std::string&		description,
66												 const deUint32			bufferSize,
67												 const glu::GLSLVersion	glslVersion,
68												 const bool				useDeviceGroups);
69
70
71	void			initPrograms				(SourceCollections&		sourceCollections) const;
72	TestInstance*	createInstance				(Context&				context) const;
73
74private:
75	const deUint32			m_bufferSize;
76	const glu::GLSLVersion	m_glslVersion;
77	const bool				m_useDeviceGroups;
78
79};
80
81BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext&			testCtx,
82													  const std::string&		name,
83													  const std::string&		description,
84													  const deUint32			bufferSize,
85													  const glu::GLSLVersion	glslVersion,
86													  const bool				useDeviceGroups)
87
88	: TestCase			(testCtx, name, description)
89	, m_bufferSize		(bufferSize)
90	, m_glslVersion		(glslVersion)
91	, m_useDeviceGroups	(useDeviceGroups)
92{
93}
94
95void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
96{
97	const char* const	versionDecl		= glu::getGLSLVersionDeclaration(m_glslVersion);
98	const deUint32		iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
99
100	std::ostringstream src;
101
102	src << versionDecl << "\n"
103		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
104		<< "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
105		<< "{\n"
106		<< "	uint data[];\n"
107		<< "} sb_in;\n"
108		<< "\n"
109		<< "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
110		<< "{\n"
111		<< "	uint result[];\n"
112		<< "} sb_out;\n"
113		<< "\n"
114		<< "void main (void)\n"
115		<< "{\n"
116		<< "	for(int i=0; i<" << iterationsCount << "; ++i) \n"
117		<< "	{\n"
118		<< "		sb_out.result[i] = sb_in.data[i];"
119		<< "	}\n"
120		<< "}\n";
121
122	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
123}
124
125class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
126{
127public:
128					BufferSparseResidencyInstance	(Context&			context,
129													 const deUint32		bufferSize,
130													 const bool			useDeviceGroups);
131
132	tcu::TestStatus	iterate							(void);
133
134private:
135	const deUint32	m_bufferSize;
136	const deUint32	m_useDeviceGroups;
137};
138
139BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context&			context,
140															  const deUint32	bufferSize,
141															  const bool		useDeviceGroups)
142	: SparseResourcesBaseInstance	(context)
143	, m_bufferSize					(bufferSize)
144	, m_useDeviceGroups				(useDeviceGroups)
145{
146}
147
148tcu::TestStatus BufferSparseResidencyInstance::iterate (void)
149{
150	const InstanceInterface&		 instance					= m_context.getInstanceInterface();
151	{
152		// Create logical device supporting both sparse and compute operations
153		QueueRequirementsVec queueRequirements;
154		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
155		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
156
157		createDeviceSupportingQueues(queueRequirements);
158	}
159	const VkPhysicalDevice			 physicalDevice				= getPhysicalDevice();
160	const VkPhysicalDeviceProperties physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
161
162	if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
163		TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
164
165	const DeviceInterface&	deviceInterface	= getDeviceInterface();
166	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
167	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
168
169	// Go through all physical devices
170	for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
171	{
172		const deUint32	firstDeviceID	= physDevID;
173		const deUint32	secondDeviceID	= (firstDeviceID + 1) % m_numPhysicalDevices;
174
175		VkBufferCreateInfo bufferCreateInfo =
176		{
177			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
178			DE_NULL,								// const void*			pNext;
179			VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
180			VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT,	// VkBufferCreateFlags	flags;
181			m_bufferSize,							// VkDeviceSize			size;
182			VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
183			VK_BUFFER_USAGE_TRANSFER_SRC_BIT,		// VkBufferUsageFlags	usage;
184			VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
185			0u,										// deUint32				queueFamilyIndexCount;
186			DE_NULL									// const deUint32*		pQueueFamilyIndices;
187		};
188
189		const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
190
191		if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
192		{
193			bufferCreateInfo.sharingMode			= VK_SHARING_MODE_CONCURRENT;
194			bufferCreateInfo.queueFamilyIndexCount	= 2u;
195			bufferCreateInfo.pQueueFamilyIndices	= queueFamilyIndices;
196		}
197
198		// Create sparse buffer
199		const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
200
201		// Create sparse buffer memory bind semaphore
202		const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
203
204		const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
205
206		if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
207			TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
208
209		DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
210
211		const deUint32				numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment);
212		std::vector<DeviceMemorySp>	deviceMemUniquePtrVec;
213
214		{
215			std::vector<VkSparseMemoryBind>		sparseMemoryBinds;
216			const deUint32						memoryType		= findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any);
217
218			if (memoryType == NO_MATCH_FOUND)
219				return tcu::TestStatus::fail("No matching memory type found");
220
221			for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
222			{
223				const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx);
224
225				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
226
227				sparseMemoryBinds.push_back(sparseMemoryBind);
228			}
229
230			const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
231
232			const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
233			{
234				VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,	//VkStructureType							sType;
235				DE_NULL,												//const void*								pNext;
236				firstDeviceID,											//deUint32									resourceDeviceIndex;
237				secondDeviceID,											//deUint32									memoryDeviceIndex;
238			};
239			const VkBindSparseInfo bindSparseInfo =
240			{
241				VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,						//VkStructureType							sType;
242				m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,	//const void*								pNext;
243				0u,														//deUint32									waitSemaphoreCount;
244				DE_NULL,												//const VkSemaphore*						pWaitSemaphores;
245				1u,														//deUint32									bufferBindCount;
246				&sparseBufferBindInfo,									//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
247				0u,														//deUint32									imageOpaqueBindCount;
248				DE_NULL,												//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
249				0u,														//deUint32									imageBindCount;
250				DE_NULL,												//const VkSparseImageMemoryBindInfo*		pImageBinds;
251				1u,														//deUint32									signalSemaphoreCount;
252				&bufferMemoryBindSemaphore.get()						//const VkSemaphore*						pSignalSemaphores;
253			};
254
255			VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
256		}
257
258		// Create input buffer
259		const VkBufferCreateInfo		inputBufferCreateInfo	= makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
260		const Unique<VkBuffer>			inputBuffer				(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
261		const de::UniquePtr<Allocation>	inputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
262
263
264		std::vector<deUint8> referenceData;
265		referenceData.resize(m_bufferSize);
266
267		for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
268		{
269			referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u);
270		}
271
272		deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
273
274		flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize);
275
276		// Create output buffer
277		const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
278		const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
279		const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
280
281		// Create command buffer for compute and data transfer oparations
282		const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
283		const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
284
285		// Start recording compute and transfer commands
286		beginCommandBuffer(deviceInterface, *commandBuffer);
287
288		// Create descriptor set
289		const Unique<VkDescriptorSetLayout> descriptorSetLayout(
290			DescriptorSetLayoutBuilder()
291			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
292			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
293			.build(deviceInterface, getDevice()));
294
295		// Create compute pipeline
296		const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
297		const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
298		const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
299
300		deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
301
302		const Unique<VkDescriptorPool> descriptorPool(
303			DescriptorPoolBuilder()
304			.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
305			.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
306
307		const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
308
309		{
310			const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
311			const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
312
313			DescriptorSetUpdateBuilder()
314				.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
315				.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
316				.update(deviceInterface, getDevice());
317		}
318
319		deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
320
321		{
322			const VkBufferMemoryBarrier inputBufferBarrier
323				= makeBufferMemoryBarrier(	VK_ACCESS_HOST_WRITE_BIT,
324											VK_ACCESS_SHADER_READ_BIT,
325											*inputBuffer,
326											0ull,
327											m_bufferSize);
328
329			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
330		}
331
332		deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
333
334		{
335			const VkBufferMemoryBarrier sparseBufferBarrier
336				= makeBufferMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
337											VK_ACCESS_TRANSFER_READ_BIT,
338											*sparseBuffer,
339											0ull,
340											m_bufferSize);
341
342			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL);
343		}
344
345		{
346			const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
347
348			deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
349		}
350
351		{
352			const VkBufferMemoryBarrier outputBufferBarrier
353				= makeBufferMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
354											VK_ACCESS_HOST_READ_BIT,
355											*outputBuffer,
356											0ull,
357											m_bufferSize);
358
359			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
360		}
361
362		// End recording compute and transfer commands
363		endCommandBuffer(deviceInterface, *commandBuffer);
364
365		const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT };
366
367		// Submit transfer commands for execution and wait for completion
368		submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(),
369			waitStageBits, 0, DE_NULL, m_useDeviceGroups, firstDeviceID);
370
371		// Retrieve data from output buffer to host memory
372		invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSize);
373
374		const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
375
376		// Wait for sparse queue to become idle
377		deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
378
379		// Compare output data with reference data
380		for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
381		{
382			const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
383			const deUint32 offset	 = alignment * sparseBindNdx;
384			const deUint32 size		 = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
385
386			if (sparseBindNdx % 2u == 0u)
387			{
388				if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
389					return tcu::TestStatus::fail("Failed");
390			}
391			else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
392			{
393				deMemset(&referenceData[offset], 0u, size);
394
395				if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
396					return tcu::TestStatus::fail("Failed");
397			}
398		}
399	}
400
401	return tcu::TestStatus::pass("Passed");
402}
403
404TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const
405{
406	return new BufferSparseResidencyInstance(context, m_bufferSize, m_useDeviceGroups);
407}
408
409} // anonymous ns
410
411void addBufferSparseResidencyTests(tcu::TestCaseGroup* group, const bool useDeviceGroups)
412{
413	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440, useDeviceGroups));
414	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440, useDeviceGroups));
415	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440, useDeviceGroups));
416	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440, useDeviceGroups));
417	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440, useDeviceGroups));
418	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440, useDeviceGroups));
419}
420
421} // sparse
422} // vkt
423