1/*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Test Case Skeleton Based on Compute Shaders
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSpvAsmComputeShaderCase.hpp"
25
26#include "deSharedPtr.hpp"
27#include "deSTLUtil.hpp"
28
29#include "vkBuilderUtil.hpp"
30#include "vkMemUtil.hpp"
31#include "vkPlatform.hpp"
32#include "vkRefUtil.hpp"
33#include "vkQueryUtil.hpp"
34#include "vkTypeUtil.hpp"
35
36namespace
37{
38
39using namespace vk;
40using std::vector;
41
42typedef vkt::SpirVAssembly::AllocationMp			AllocationMp;
43typedef vkt::SpirVAssembly::AllocationSp			AllocationSp;
44
45typedef Unique<VkBuffer>							BufferHandleUp;
46typedef de::SharedPtr<BufferHandleUp>				BufferHandleSp;
47
48/*--------------------------------------------------------------------*//*!
49 * \brief Create storage buffer, allocate and bind memory for the buffer
50 *
51 * The memory is created as host visible and passed back as a vk::Allocation
52 * instance via outMemory.
53 *//*--------------------------------------------------------------------*/
54Move<VkBuffer> createBufferAndBindMemory (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorType dtype, Allocator& allocator, size_t numBytes, AllocationMp* outMemory)
55{
56	VkBufferUsageFlags			usageBit		= (VkBufferUsageFlags)0;
57
58	switch (dtype)
59	{
60		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:	usageBit = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; break;
61		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:	usageBit = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; break;
62		default:								DE_ASSERT(false);
63	}
64
65	const VkBufferCreateInfo bufferCreateInfo	=
66	{
67		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// sType
68		DE_NULL,								// pNext
69		0u,										// flags
70		numBytes,								// size
71		usageBit,								// usage
72		VK_SHARING_MODE_EXCLUSIVE,				// sharingMode
73		0u,										// queueFamilyCount
74		DE_NULL,								// pQueueFamilyIndices
75	};
76
77	Move<VkBuffer>				buffer			(createBuffer(vkdi, device, &bufferCreateInfo));
78	const VkMemoryRequirements	requirements	= getBufferMemoryRequirements(vkdi, device, *buffer);
79	AllocationMp				bufferMemory	= allocator.allocate(requirements, MemoryRequirement::HostVisible);
80
81	VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
82	*outMemory = bufferMemory;
83
84	return buffer;
85}
86
87void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data)
88{
89	void* const hostPtr = destAlloc->getHostPtr();
90
91	deMemcpy((deUint8*)hostPtr, data, numBytes);
92	flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
93}
94
95void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value)
96{
97	void* const hostPtr = destAlloc->getHostPtr();
98
99	deMemset((deUint8*)hostPtr, value, numBytes);
100	flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
101}
102
103void invalidateMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* srcAlloc, size_t numBytes)
104{
105	invalidateMappedMemoryRange(vkdi, device, srcAlloc->getMemory(), srcAlloc->getOffset(), numBytes);
106}
107
108/*--------------------------------------------------------------------*//*!
109 * \brief Create a descriptor set layout with the given descriptor types
110 *
111 * All descriptors are created for compute pipeline.
112 *//*--------------------------------------------------------------------*/
113Move<VkDescriptorSetLayout> createDescriptorSetLayout (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
114{
115	DescriptorSetLayoutBuilder builder;
116
117	for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx)
118		builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT);
119
120	return builder.build(vkdi, device);
121}
122
123/*--------------------------------------------------------------------*//*!
124 * \brief Create a pipeline layout with one descriptor set
125 *//*--------------------------------------------------------------------*/
126Move<VkPipelineLayout> createPipelineLayout (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorSetLayout descriptorSetLayout, const vkt::SpirVAssembly::BufferSp& pushConstants)
127{
128	VkPipelineLayoutCreateInfo		createInfo	=
129	{
130		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// sType
131		DE_NULL,										// pNext
132		(VkPipelineLayoutCreateFlags)0,
133		1u,												// descriptorSetCount
134		&descriptorSetLayout,							// pSetLayouts
135		0u,												// pushConstantRangeCount
136		DE_NULL,										// pPushConstantRanges
137	};
138
139	VkPushConstantRange				range		=
140	{
141		VK_SHADER_STAGE_COMPUTE_BIT,					// stageFlags
142		0,												// offset
143		0,												// size
144	};
145
146	if (pushConstants != DE_NULL)
147	{
148		vector<deUint8> pushConstantsBytes;
149		pushConstants->getBytes(pushConstantsBytes);
150
151		range.size							= static_cast<deUint32>(pushConstantsBytes.size());
152		createInfo.pushConstantRangeCount	= 1;
153		createInfo.pPushConstantRanges		= &range;
154	}
155
156	return createPipelineLayout(vkdi, device, &createInfo);
157}
158
159/*--------------------------------------------------------------------*//*!
160 * \brief Create a one-time descriptor pool for one descriptor set that
161 * support the given descriptor types.
162 *//*--------------------------------------------------------------------*/
163inline Move<VkDescriptorPool> createDescriptorPool (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
164{
165	DescriptorPoolBuilder builder;
166
167	for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx)
168		builder.addType(dtypes[typeNdx], 1);
169
170	return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1);
171}
172
173/*--------------------------------------------------------------------*//*!
174 * \brief Create a descriptor set
175 *
176 * The descriptor set's layout contains the given descriptor types,
177 * sequentially binded to binding points starting from 0.
178 *//*--------------------------------------------------------------------*/
179Move<VkDescriptorSet> createDescriptorSet (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorPool pool, VkDescriptorSetLayout layout, const vector<VkDescriptorType>& dtypes, const vector<VkDescriptorBufferInfo>& descriptorInfos)
180{
181	DE_ASSERT(dtypes.size() == descriptorInfos.size());
182
183	const VkDescriptorSetAllocateInfo	allocInfo	=
184	{
185		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
186		DE_NULL,
187		pool,
188		1u,
189		&layout
190	};
191
192	Move<VkDescriptorSet>				descriptorSet	= allocateDescriptorSet(vkdi, device, &allocInfo);
193	DescriptorSetUpdateBuilder			builder;
194
195	for (deUint32 descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx)
196		builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorInfos[descriptorNdx]);
197	builder.update(vkdi, device);
198
199	return descriptorSet;
200}
201
202/*--------------------------------------------------------------------*//*!
203 * \brief Create a compute pipeline based on the given shader
204 *//*--------------------------------------------------------------------*/
205Move<VkPipeline> createComputePipeline (const DeviceInterface& vkdi, const VkDevice& device, VkPipelineLayout pipelineLayout, VkShaderModule shader, const char* entryPoint, const vector<deUint32>& specConstants)
206{
207	const deUint32							numSpecConstants				= (deUint32)specConstants.size();
208	vector<VkSpecializationMapEntry>		entries;
209	VkSpecializationInfo					specInfo;
210
211	if (numSpecConstants != 0)
212	{
213		entries.resize(numSpecConstants);
214
215		for (deUint32 ndx = 0; ndx < numSpecConstants; ++ndx)
216		{
217			entries[ndx].constantID	= ndx;
218			entries[ndx].offset		= ndx * (deUint32)sizeof(deUint32);
219			entries[ndx].size		= sizeof(deUint32);
220		}
221
222		specInfo.mapEntryCount		= numSpecConstants;
223		specInfo.pMapEntries		= &entries[0];
224		specInfo.dataSize			= numSpecConstants * sizeof(deUint32);
225		specInfo.pData				= specConstants.data();
226	}
227
228	const VkPipelineShaderStageCreateInfo	pipelineShaderStageCreateInfo	=
229	{
230		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// sType
231		DE_NULL,												// pNext
232		(VkPipelineShaderStageCreateFlags)0,					// flags
233		VK_SHADER_STAGE_COMPUTE_BIT,							// stage
234		shader,													// module
235		entryPoint,												// pName
236		(numSpecConstants == 0) ? DE_NULL : &specInfo,			// pSpecializationInfo
237	};
238	const VkComputePipelineCreateInfo		pipelineCreateInfo				=
239	{
240		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,			// sType
241		DE_NULL,												// pNext
242		(VkPipelineCreateFlags)0,
243		pipelineShaderStageCreateInfo,							// cs
244		pipelineLayout,											// layout
245		(VkPipeline)0,											// basePipelineHandle
246		0u,														// basePipelineIndex
247	};
248
249	return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo);
250}
251
252/*--------------------------------------------------------------------*//*!
253 * \brief Create a command pool
254 *
255 * The created command pool is designated for use on the queue type
256 * represented by the given queueFamilyIndex.
257 *//*--------------------------------------------------------------------*/
258Move<VkCommandPool> createCommandPool (const DeviceInterface& vkdi, VkDevice device, deUint32 queueFamilyIndex)
259{
260	return createCommandPool(vkdi, device, 0u, queueFamilyIndex);
261}
262
263} // anonymous
264
265namespace vkt
266{
267namespace SpirVAssembly
268{
269
270// ComputeShaderTestCase implementations
271
272SpvAsmComputeShaderCase::SpvAsmComputeShaderCase (tcu::TestContext& testCtx, const char* name, const char* description, const ComputeShaderSpec& spec, const ComputeTestFeatures features)
273	: TestCase		(testCtx, name, description)
274	, m_shaderSpec	(spec)
275	, m_features	(features)
276{
277}
278
279void SpvAsmComputeShaderCase::initPrograms (SourceCollections& programCollection) const
280{
281	programCollection.spirvAsmSources.add("compute") << m_shaderSpec.assembly.c_str() << SpirVAsmBuildOptions(m_shaderSpec.spirvVersion);
282}
283
284TestInstance* SpvAsmComputeShaderCase::createInstance (Context& ctx) const
285{
286	if (getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion) > ctx.getUsedApiVersion())
287	{
288		TCU_THROW(NotSupportedError, std::string("Vulkan higher than or equal to " + getVulkanName(getMinRequiredVulkanVersion(m_shaderSpec.spirvVersion)) + " is required for this test to run").c_str());
289	}
290	return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec, m_features);
291}
292
293// ComputeShaderTestInstance implementations
294
295SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance (Context& ctx, const ComputeShaderSpec& spec, const ComputeTestFeatures features)
296	: TestInstance		(ctx)
297	, m_shaderSpec		(spec)
298	, m_features		(features)
299{
300}
301
302tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
303{
304	const VkPhysicalDeviceFeatures&		features			= m_context.getDeviceFeatures();
305
306	if ((m_features == COMPUTE_TEST_USES_INT16 || m_features == COMPUTE_TEST_USES_INT16_INT64) && !features.shaderInt16)
307	{
308		TCU_THROW(NotSupportedError, "shaderInt16 feature is not supported");
309	}
310
311	if ((m_features == COMPUTE_TEST_USES_INT64 || m_features == COMPUTE_TEST_USES_INT16_INT64) && !features.shaderInt64)
312	{
313		TCU_THROW(NotSupportedError, "shaderInt64 feature is not supported");
314	}
315
316	{
317		const InstanceInterface&			vki					= m_context.getInstanceInterface();
318		const VkPhysicalDevice				physicalDevice		= m_context.getPhysicalDevice();
319
320		// 16bit storage features
321		{
322			if (!is16BitStorageFeaturesSupported(m_context.getUsedApiVersion(), vki, physicalDevice, m_context.getInstanceExtensions(), m_shaderSpec.requestedVulkanFeatures.ext16BitStorage))
323				TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
324		}
325
326		// VariablePointers features
327		{
328			if (!isVariablePointersFeaturesSupported(m_context.getUsedApiVersion(), vki, physicalDevice, m_context.getInstanceExtensions(), m_shaderSpec.requestedVulkanFeatures.extVariablePointers))
329				TCU_THROW(NotSupportedError, "Request Variable Pointer feature not supported");
330		}
331	}
332
333	// defer device and resource creation until after feature checks
334	const Unique<VkDevice>				vkDevice			(createDeviceWithExtensions(m_context, m_context.getUniversalQueueFamilyIndex(), m_context.getDeviceExtensions(), m_shaderSpec.extensions));
335	const VkDevice&						device				= *vkDevice;
336	const DeviceDriver					vkDeviceInterface	(m_context.getInstanceInterface(), device);
337	const DeviceInterface&				vkdi				= vkDeviceInterface;
338	const de::UniquePtr<vk::Allocator>	vkAllocator			(createAllocator(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), vkDeviceInterface, device));
339	Allocator&							allocator			= *vkAllocator;
340	const VkQueue						queue				(getDeviceQueue(vkDeviceInterface, device, m_context.getUniversalQueueFamilyIndex(), 0));
341
342	vector<AllocationSp>				inputAllocs;
343	vector<AllocationSp>				outputAllocs;
344	vector<BufferHandleSp>				inputBuffers;
345	vector<BufferHandleSp>				outputBuffers;
346	vector<VkDescriptorBufferInfo>		descriptorInfos;
347	vector<VkDescriptorType>			descriptorTypes;
348
349	DE_ASSERT(!m_shaderSpec.outputs.empty());
350
351	// Create buffer object, allocate storage, and create view for all input/output buffers.
352
353	for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
354	{
355		if (m_shaderSpec.inputTypes.count(inputNdx) != 0)
356			descriptorTypes.push_back(m_shaderSpec.inputTypes.at(inputNdx));
357		else
358			descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
359
360		AllocationMp		alloc;
361		const BufferSp&		input		= m_shaderSpec.inputs[inputNdx];
362		vector<deUint8>		inputBytes;
363
364		input->getBytes(inputBytes);
365
366		const size_t		numBytes	= inputBytes.size();
367		BufferHandleUp*		buffer		= new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc));
368
369		setMemory(vkdi, device, &*alloc, numBytes, &inputBytes.front());
370		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
371		inputBuffers.push_back(BufferHandleSp(buffer));
372		inputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
373	}
374
375	for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
376	{
377		descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
378
379		AllocationMp		alloc;
380		const BufferSp&		output		= m_shaderSpec.outputs[outputNdx];
381		vector<deUint8>		outputBytes;
382
383		output->getBytes(outputBytes);
384
385		const size_t		numBytes	= outputBytes.size();
386		BufferHandleUp*		buffer		= new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc));
387
388		fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff);
389		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
390		outputBuffers.push_back(BufferHandleSp(buffer));
391		outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
392	}
393
394	// Create layouts and descriptor set.
395
396	Unique<VkDescriptorSetLayout>		descriptorSetLayout	(createDescriptorSetLayout(vkdi, device, descriptorTypes));
397	Unique<VkPipelineLayout>			pipelineLayout		(createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants));
398	Unique<VkDescriptorPool>			descriptorPool		(createDescriptorPool(vkdi, device, descriptorTypes));
399	Unique<VkDescriptorSet>				descriptorSet		(createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout, descriptorTypes, descriptorInfos));
400
401	// Create compute shader and pipeline.
402
403	const ProgramBinary&				binary				= m_context.getBinaryCollection().get("compute");
404	if (m_shaderSpec.verifyBinary && !m_shaderSpec.verifyBinary(binary))
405	{
406		return tcu::TestStatus::fail("Binary verification of SPIR-V in the test failed");
407	}
408	Unique<VkShaderModule>				module				(createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u));
409
410	Unique<VkPipeline>					computePipeline		(createComputePipeline(vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));
411
412	// Create command buffer and record commands
413
414	const Unique<VkCommandPool>			cmdPool				(createCommandPool(vkdi, device, m_context.getUniversalQueueFamilyIndex()));
415	Unique<VkCommandBuffer>				cmdBuffer			(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
416
417	const VkCommandBufferBeginInfo		cmdBufferBeginInfo	=
418	{
419		VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,	// sType
420		DE_NULL,										// pNext
421		VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
422		(const VkCommandBufferInheritanceInfo*)DE_NULL,
423	};
424
425	const tcu::IVec3&				numWorkGroups		= m_shaderSpec.numWorkGroups;
426
427	VK_CHECK(vkdi.beginCommandBuffer(*cmdBuffer, &cmdBufferBeginInfo));
428	vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
429	vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
430	if (m_shaderSpec.pushConstants != DE_NULL)
431	{
432		vector<deUint8>	pushConstantsBytes;
433		m_shaderSpec.pushConstants->getBytes(pushConstantsBytes);
434
435		const deUint32	size	= static_cast<deUint32>(pushConstantsBytes.size());
436		const void*		data	= &pushConstantsBytes.front();
437
438		vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0, /* size = */ size, data);
439	}
440	vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z());
441	VK_CHECK(vkdi.endCommandBuffer(*cmdBuffer));
442
443	// Create fence and run.
444
445	const Unique<VkFence>			cmdCompleteFence	(createFence(vkdi, device));
446	const deUint64					infiniteTimeout		= ~(deUint64)0u;
447	const VkSubmitInfo				submitInfo			=
448	{
449		VK_STRUCTURE_TYPE_SUBMIT_INFO,
450		DE_NULL,
451		0u,
452		(const VkSemaphore*)DE_NULL,
453		(const VkPipelineStageFlags*)DE_NULL,
454		1u,
455		&cmdBuffer.get(),
456		0u,
457		(const VkSemaphore*)DE_NULL,
458	};
459
460	VK_CHECK(vkdi.queueSubmit(queue, 1, &submitInfo, *cmdCompleteFence));
461	VK_CHECK(vkdi.waitForFences(device, 1, &cmdCompleteFence.get(), 0u, infiniteTimeout)); // \note: timeout is failure
462
463	// Invalidate output memory ranges before checking on host.
464	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
465	{
466		invalidateMemory(vkdi, device, outputAllocs[outputNdx].get(), m_shaderSpec.outputs[outputNdx]->getByteSize());
467	}
468
469	// Check output.
470	if (m_shaderSpec.verifyIO)
471	{
472		if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs, m_context.getTestContext().getLog()))
473			return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
474	}
475	else
476	{
477		for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
478		{
479			const BufferSp&	expectedOutput = m_shaderSpec.outputs[outputNdx];
480			vector<deUint8>	expectedBytes;
481
482			expectedOutput->getBytes(expectedBytes);
483
484			if (deMemCmp(&expectedBytes.front(), outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size()))
485				return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
486		}
487	}
488
489	return tcu::TestStatus::pass("Output match with expected");
490}
491
492} // SpirVAssembly
493} // vkt
494