1/*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 Google Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Test Case Skeleton Based on Compute Shaders
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSpvAsmComputeShaderCase.hpp"
25
26#include "deSharedPtr.hpp"
27#include "deSTLUtil.hpp"
28
29#include "vkBuilderUtil.hpp"
30#include "vkMemUtil.hpp"
31#include "vkPlatform.hpp"
32#include "vkRefUtil.hpp"
33#include "vkQueryUtil.hpp"
34#include "vkTypeUtil.hpp"
35
36namespace
37{
38
39using namespace vk;
40using std::vector;
41
42typedef vkt::SpirVAssembly::AllocationMp			AllocationMp;
43typedef vkt::SpirVAssembly::AllocationSp			AllocationSp;
44
45typedef Unique<VkBuffer>							BufferHandleUp;
46typedef de::SharedPtr<BufferHandleUp>				BufferHandleSp;
47
48/*--------------------------------------------------------------------*//*!
49 * \brief Create storage buffer, allocate and bind memory for the buffer
50 *
51 * The memory is created as host visible and passed back as a vk::Allocation
52 * instance via outMemory.
53 *//*--------------------------------------------------------------------*/
54Move<VkBuffer> createBufferAndBindMemory (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorType dtype, Allocator& allocator, size_t numBytes, AllocationMp* outMemory)
55{
56	VkBufferUsageFlags			usageBit		= (VkBufferUsageFlags)0;
57
58	switch (dtype)
59	{
60		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:	usageBit = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; break;
61		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:	usageBit = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; break;
62		default:								DE_ASSERT(false);
63	}
64
65	const VkBufferCreateInfo bufferCreateInfo	=
66	{
67		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// sType
68		DE_NULL,								// pNext
69		0u,										// flags
70		numBytes,								// size
71		usageBit,								// usage
72		VK_SHARING_MODE_EXCLUSIVE,				// sharingMode
73		0u,										// queueFamilyCount
74		DE_NULL,								// pQueueFamilyIndices
75	};
76
77	Move<VkBuffer>				buffer			(createBuffer(vkdi, device, &bufferCreateInfo));
78	const VkMemoryRequirements	requirements	= getBufferMemoryRequirements(vkdi, device, *buffer);
79	AllocationMp				bufferMemory	= allocator.allocate(requirements, MemoryRequirement::HostVisible);
80
81	VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset()));
82	*outMemory = bufferMemory;
83
84	return buffer;
85}
86
87void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data)
88{
89	void* const hostPtr = destAlloc->getHostPtr();
90
91	deMemcpy((deUint8*)hostPtr, data, numBytes);
92	flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
93}
94
95void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value)
96{
97	void* const hostPtr = destAlloc->getHostPtr();
98
99	deMemset((deUint8*)hostPtr, value, numBytes);
100	flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes);
101}
102
103/*--------------------------------------------------------------------*//*!
104 * \brief Create a descriptor set layout with the given descriptor types
105 *
106 * All descriptors are created for compute pipeline.
107 *//*--------------------------------------------------------------------*/
108Move<VkDescriptorSetLayout> createDescriptorSetLayout (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
109{
110	DescriptorSetLayoutBuilder builder;
111
112	for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx)
113		builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT);
114
115	return builder.build(vkdi, device);
116}
117
118/*--------------------------------------------------------------------*//*!
119 * \brief Create a pipeline layout with one descriptor set
120 *//*--------------------------------------------------------------------*/
121Move<VkPipelineLayout> createPipelineLayout (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorSetLayout descriptorSetLayout, const vkt::SpirVAssembly::BufferSp& pushConstants)
122{
123	VkPipelineLayoutCreateInfo		createInfo	=
124	{
125		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// sType
126		DE_NULL,										// pNext
127		(VkPipelineLayoutCreateFlags)0,
128		1u,												// descriptorSetCount
129		&descriptorSetLayout,							// pSetLayouts
130		0u,												// pushConstantRangeCount
131		DE_NULL,										// pPushConstantRanges
132	};
133
134	VkPushConstantRange				range		=
135	{
136		VK_SHADER_STAGE_COMPUTE_BIT,					// stageFlags
137		0,												// offset
138		0,												// size
139	};
140
141	if (pushConstants != DE_NULL)
142	{
143		range.size							= static_cast<deUint32>(pushConstants->getNumBytes());
144		createInfo.pushConstantRangeCount	= 1;
145		createInfo.pPushConstantRanges		= &range;
146	}
147
148	return createPipelineLayout(vkdi, device, &createInfo);
149}
150
151/*--------------------------------------------------------------------*//*!
152 * \brief Create a one-time descriptor pool for one descriptor set that
153 * support the given descriptor types.
154 *//*--------------------------------------------------------------------*/
155inline Move<VkDescriptorPool> createDescriptorPool (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes)
156{
157	DescriptorPoolBuilder builder;
158
159	for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx)
160		builder.addType(dtypes[typeNdx], 1);
161
162	return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1);
163}
164
165/*--------------------------------------------------------------------*//*!
166 * \brief Create a descriptor set
167 *
168 * The descriptor set's layout contains the given descriptor types,
169 * sequentially binded to binding points starting from 0.
170 *//*--------------------------------------------------------------------*/
171Move<VkDescriptorSet> createDescriptorSet (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorPool pool, VkDescriptorSetLayout layout, const vector<VkDescriptorType>& dtypes, const vector<VkDescriptorBufferInfo>& descriptorInfos)
172{
173	DE_ASSERT(dtypes.size() == descriptorInfos.size());
174
175	const VkDescriptorSetAllocateInfo	allocInfo	=
176	{
177		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
178		DE_NULL,
179		pool,
180		1u,
181		&layout
182	};
183
184	Move<VkDescriptorSet>				descriptorSet	= allocateDescriptorSet(vkdi, device, &allocInfo);
185	DescriptorSetUpdateBuilder			builder;
186
187	for (deUint32 descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx)
188		builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorInfos[descriptorNdx]);
189	builder.update(vkdi, device);
190
191	return descriptorSet;
192}
193
194/*--------------------------------------------------------------------*//*!
195 * \brief Create a compute pipeline based on the given shader
196 *//*--------------------------------------------------------------------*/
197Move<VkPipeline> createComputePipeline (const DeviceInterface& vkdi, const VkDevice& device, VkPipelineLayout pipelineLayout, VkShaderModule shader, const char* entryPoint, const vector<deUint32>& specConstants)
198{
199	const deUint32							numSpecConstants				= (deUint32)specConstants.size();
200	vector<VkSpecializationMapEntry>		entries;
201	VkSpecializationInfo					specInfo;
202
203	if (numSpecConstants != 0)
204	{
205		entries.resize(numSpecConstants);
206
207		for (deUint32 ndx = 0; ndx < numSpecConstants; ++ndx)
208		{
209			entries[ndx].constantID	= ndx;
210			entries[ndx].offset		= ndx * (deUint32)sizeof(deUint32);
211			entries[ndx].size		= sizeof(deUint32);
212		}
213
214		specInfo.mapEntryCount		= numSpecConstants;
215		specInfo.pMapEntries		= &entries[0];
216		specInfo.dataSize			= numSpecConstants * sizeof(deUint32);
217		specInfo.pData				= specConstants.data();
218	}
219
220	const VkPipelineShaderStageCreateInfo	pipelineShaderStageCreateInfo	=
221	{
222		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// sType
223		DE_NULL,												// pNext
224		(VkPipelineShaderStageCreateFlags)0,					// flags
225		VK_SHADER_STAGE_COMPUTE_BIT,							// stage
226		shader,													// module
227		entryPoint,												// pName
228		(numSpecConstants == 0) ? DE_NULL : &specInfo,			// pSpecializationInfo
229	};
230	const VkComputePipelineCreateInfo		pipelineCreateInfo				=
231	{
232		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,			// sType
233		DE_NULL,												// pNext
234		(VkPipelineCreateFlags)0,
235		pipelineShaderStageCreateInfo,							// cs
236		pipelineLayout,											// layout
237		(VkPipeline)0,											// basePipelineHandle
238		0u,														// basePipelineIndex
239	};
240
241	return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo);
242}
243
244/*--------------------------------------------------------------------*//*!
245 * \brief Create a command pool
246 *
247 * The created command pool is designated for use on the queue type
248 * represented by the given queueFamilyIndex.
249 *//*--------------------------------------------------------------------*/
250Move<VkCommandPool> createCommandPool (const DeviceInterface& vkdi, VkDevice device, deUint32 queueFamilyIndex)
251{
252	return createCommandPool(vkdi, device, 0u, queueFamilyIndex);
253}
254
255} // anonymous
256
257namespace vkt
258{
259namespace SpirVAssembly
260{
261
262/*--------------------------------------------------------------------*//*!
263 * \brief Test instance for compute pipeline
264 *
265 * The compute shader is specified in the format of SPIR-V assembly, which
266 * is allowed to access MAX_NUM_INPUT_BUFFERS input storage buffers and
267 * MAX_NUM_OUTPUT_BUFFERS output storage buffers maximally. The shader
268 * source and input/output data are given in a ComputeShaderSpec object.
269 *
270 * This instance runs the given compute shader by feeding the data from input
271 * buffers and compares the data in the output buffers with the expected.
272 *//*--------------------------------------------------------------------*/
273class SpvAsmComputeShaderInstance : public TestInstance
274{
275public:
276										SpvAsmComputeShaderInstance	(Context& ctx, const ComputeShaderSpec& spec, const ComputeTestFeatures features);
277	tcu::TestStatus						iterate						(void);
278
279private:
280	const ComputeShaderSpec&			m_shaderSpec;
281	const ComputeTestFeatures			m_features;
282};
283
284// ComputeShaderTestCase implementations
285
286SpvAsmComputeShaderCase::SpvAsmComputeShaderCase (tcu::TestContext& testCtx, const char* name, const char* description, const ComputeShaderSpec& spec, const ComputeTestFeatures features)
287	: TestCase		(testCtx, name, description)
288	, m_shaderSpec	(spec)
289	, m_features	(features)
290{
291}
292
293void SpvAsmComputeShaderCase::initPrograms (SourceCollections& programCollection) const
294{
295	programCollection.spirvAsmSources.add("compute") << m_shaderSpec.assembly.c_str();
296}
297
298TestInstance* SpvAsmComputeShaderCase::createInstance (Context& ctx) const
299{
300	return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec, m_features);
301}
302
303// ComputeShaderTestInstance implementations
304
305SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance (Context& ctx, const ComputeShaderSpec& spec, const ComputeTestFeatures features)
306	: TestInstance		(ctx)
307	, m_shaderSpec		(spec)
308	, m_features		(features)
309{
310}
311
312tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void)
313{
314	const VkPhysicalDeviceFeatures&		features			= m_context.getDeviceFeatures();
315
316	if ((m_features == COMPUTE_TEST_USES_INT16 || m_features == COMPUTE_TEST_USES_INT16_INT64) && !features.shaderInt16)
317	{
318		TCU_THROW(NotSupportedError, "shaderInt16 feature is not supported");
319	}
320
321	if ((m_features == COMPUTE_TEST_USES_INT64 || m_features == COMPUTE_TEST_USES_INT16_INT64) && !features.shaderInt64)
322	{
323		TCU_THROW(NotSupportedError, "shaderInt64 feature is not supported");
324	}
325
326	{
327		const InstanceInterface&			vki					= m_context.getInstanceInterface();
328		const VkPhysicalDevice				physicalDevice		= m_context.getPhysicalDevice();
329
330		// 16bit storage features
331		{
332			if (!is16BitStorageFeaturesSupported(vki, physicalDevice, m_context.getInstanceExtensions(), m_shaderSpec.requestedVulkanFeatures.ext16BitStorage))
333				TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported");
334		}
335
336		// VariablePointers features
337		{
338			if (!isVariablePointersFeaturesSupported(vki, physicalDevice, m_context.getInstanceExtensions(), m_shaderSpec.requestedVulkanFeatures.extVariablePointers))
339				TCU_THROW(NotSupportedError, "Request Variable Pointer feature not supported");
340		}
341	}
342
343	// defer device and resource creation until after feature checks
344	const Unique<VkDevice>				vkDevice			(createDeviceWithExtensions(m_context, m_context.getUniversalQueueFamilyIndex(), m_context.getDeviceExtensions(), m_shaderSpec.extensions));
345	const VkDevice&						device				= *vkDevice;
346	const DeviceDriver					vkDeviceInterface	(m_context.getInstanceInterface(), device);
347	const DeviceInterface&				vkdi				= vkDeviceInterface;
348	const de::UniquePtr<vk::Allocator>	vkAllocator			(createAllocator(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), vkDeviceInterface, device));
349	Allocator&							allocator			= *vkAllocator;
350	const VkQueue						queue				(getDeviceQueue(vkDeviceInterface, device, m_context.getUniversalQueueFamilyIndex(), 0));
351
352	vector<AllocationSp>				inputAllocs;
353	vector<AllocationSp>				outputAllocs;
354	vector<BufferHandleSp>				inputBuffers;
355	vector<BufferHandleSp>				outputBuffers;
356	vector<VkDescriptorBufferInfo>		descriptorInfos;
357	vector<VkDescriptorType>			descriptorTypes;
358
359	DE_ASSERT(!m_shaderSpec.outputs.empty());
360
361	// Create buffer object, allocate storage, and create view for all input/output buffers.
362
363	for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
364	{
365		if (m_shaderSpec.inputTypes.count(inputNdx) != 0)
366			descriptorTypes.push_back(m_shaderSpec.inputTypes.at(inputNdx));
367		else
368			descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
369
370		AllocationMp		alloc;
371		const BufferSp&		input		= m_shaderSpec.inputs[inputNdx];
372		const size_t		numBytes	= input->getNumBytes();
373		BufferHandleUp*		buffer		= new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc));
374
375		setMemory(vkdi, device, &*alloc, numBytes, input->data());
376		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
377		inputBuffers.push_back(BufferHandleSp(buffer));
378		inputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
379	}
380
381	for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
382	{
383		descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
384
385		AllocationMp		alloc;
386		const BufferSp&		output		= m_shaderSpec.outputs[outputNdx];
387		const size_t		numBytes	= output->getNumBytes();
388		BufferHandleUp*		buffer		= new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc));
389
390		fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff);
391		descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes));
392		outputBuffers.push_back(BufferHandleSp(buffer));
393		outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release()));
394	}
395
396	// Create layouts and descriptor set.
397
398	Unique<VkDescriptorSetLayout>		descriptorSetLayout	(createDescriptorSetLayout(vkdi, device, descriptorTypes));
399	Unique<VkPipelineLayout>			pipelineLayout		(createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants));
400	Unique<VkDescriptorPool>			descriptorPool		(createDescriptorPool(vkdi, device, descriptorTypes));
401	Unique<VkDescriptorSet>				descriptorSet		(createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout, descriptorTypes, descriptorInfos));
402
403	// Create compute shader and pipeline.
404
405	const ProgramBinary&				binary				= m_context.getBinaryCollection().get("compute");
406	Unique<VkShaderModule>				module				(createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u));
407
408	Unique<VkPipeline>					computePipeline		(createComputePipeline(vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants));
409
410	// Create command buffer and record commands
411
412	const Unique<VkCommandPool>			cmdPool				(createCommandPool(vkdi, device, m_context.getUniversalQueueFamilyIndex()));
413	Unique<VkCommandBuffer>				cmdBuffer			(allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
414
415	const VkCommandBufferBeginInfo		cmdBufferBeginInfo	=
416	{
417		VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,	// sType
418		DE_NULL,										// pNext
419		VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
420		(const VkCommandBufferInheritanceInfo*)DE_NULL,
421	};
422
423	const tcu::IVec3&				numWorkGroups		= m_shaderSpec.numWorkGroups;
424
425	VK_CHECK(vkdi.beginCommandBuffer(*cmdBuffer, &cmdBufferBeginInfo));
426	vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
427	vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
428	if (m_shaderSpec.pushConstants != DE_NULL)
429	{
430		const deUint32	size	= static_cast<deUint32>(m_shaderSpec.pushConstants->getNumBytes());
431		const void*		data	= m_shaderSpec.pushConstants->data();
432
433		vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0, /* size = */ size, data);
434	}
435	vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z());
436	VK_CHECK(vkdi.endCommandBuffer(*cmdBuffer));
437
438	// Create fence and run.
439
440	const Unique<VkFence>			cmdCompleteFence	(createFence(vkdi, device));
441	const deUint64					infiniteTimeout		= ~(deUint64)0u;
442	const VkSubmitInfo				submitInfo			=
443	{
444		VK_STRUCTURE_TYPE_SUBMIT_INFO,
445		DE_NULL,
446		0u,
447		(const VkSemaphore*)DE_NULL,
448		(const VkPipelineStageFlags*)DE_NULL,
449		1u,
450		&cmdBuffer.get(),
451		0u,
452		(const VkSemaphore*)DE_NULL,
453	};
454
455	VK_CHECK(vkdi.queueSubmit(queue, 1, &submitInfo, *cmdCompleteFence));
456	VK_CHECK(vkdi.waitForFences(device, 1, &cmdCompleteFence.get(), 0u, infiniteTimeout)); // \note: timeout is failure
457
458	// Check output.
459	if (m_shaderSpec.verifyIO)
460	{
461		if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs, m_context.getTestContext().getLog()))
462			return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
463	}
464	else
465	{
466		for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
467		{
468			const BufferSp& expectedOutput = m_shaderSpec.outputs[outputNdx];
469			if (deMemCmp(expectedOutput->data(), outputAllocs[outputNdx]->getHostPtr(), expectedOutput->getNumBytes()))
470				return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage);
471		}
472	}
473
474	return tcu::TestStatus::pass("Output match with expected");
475}
476
477} // SpirVAssembly
478} // vkt
479