1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 * Copyright (c) 2016 The Android Open Source Project
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Indirect Compute Dispatch tests
23 *//*--------------------------------------------------------------------*/
24
25#include "vktComputeIndirectComputeDispatchTests.hpp"
26#include "vktComputeTestsUtil.hpp"
27
28#include <string>
29#include <map>
30#include <vector>
31
32#include "vkDefs.hpp"
33#include "vkRef.hpp"
34#include "vkRefUtil.hpp"
35#include "vktTestCase.hpp"
36#include "vktTestCaseUtil.hpp"
37#include "vkPlatform.hpp"
38#include "vkPrograms.hpp"
39#include "vkMemUtil.hpp"
40#include "vkBuilderUtil.hpp"
41#include "vkQueryUtil.hpp"
42
43#include "tcuVector.hpp"
44#include "tcuVectorUtil.hpp"
45#include "tcuTestLog.hpp"
46#include "tcuRGBA.hpp"
47#include "tcuStringTemplate.hpp"
48
49#include "deUniquePtr.hpp"
50#include "deSharedPtr.hpp"
51#include "deStringUtil.hpp"
52#include "deArrayUtil.hpp"
53
54#include "gluShaderUtil.hpp"
55
56namespace vkt
57{
58namespace compute
59{
60namespace
61{
62
63enum
64{
65	RESULT_BLOCK_BASE_SIZE			= 4 * (int)sizeof(deUint32), // uvec3 + uint
66	RESULT_BLOCK_NUM_PASSED_OFFSET	= 3 * (int)sizeof(deUint32),
67	INDIRECT_COMMAND_OFFSET			= 3 * (int)sizeof(deUint32),
68};
69
70vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface&	instance_interface,
71											const vk::VkPhysicalDevice		physicalDevice,
72											const vk::VkDeviceSize			baseSize)
73{
74	// TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged.
75	vk::VkPhysicalDeviceProperties deviceProperties;
76	instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
77	vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment;
78
79	if (alignment == 0 || (baseSize % alignment == 0))
80		return baseSize;
81	else
82		return (baseSize / alignment + 1)*alignment;
83}
84
85struct DispatchCommand
86{
87				DispatchCommand (const deIntptr		offset,
88								 const tcu::UVec3&	numWorkGroups)
89					: m_offset			(offset)
90					, m_numWorkGroups	(numWorkGroups) {}
91
92	deIntptr	m_offset;
93	tcu::UVec3	m_numWorkGroups;
94};
95
96typedef std::vector<DispatchCommand> DispatchCommandsVec;
97
98struct DispatchCaseDesc
99{
100								DispatchCaseDesc (const char*					name,
101												  const char*					description,
102												  const deUintptr				bufferSize,
103												  const tcu::UVec3				workGroupSize,
104												  const DispatchCommandsVec&	dispatchCommands)
105									: m_name				(name)
106									, m_description			(description)
107									, m_bufferSize			(bufferSize)
108									, m_workGroupSize		(workGroupSize)
109									, m_dispatchCommands	(dispatchCommands) {}
110
111	const char*					m_name;
112	const char*					m_description;
113	const deUintptr				m_bufferSize;
114	const tcu::UVec3			m_workGroupSize;
115	const DispatchCommandsVec	m_dispatchCommands;
116};
117
118class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance
119{
120public:
121									IndirectDispatchInstanceBufferUpload	(Context&					context,
122																			 const std::string&			name,
123																			 const deUintptr			bufferSize,
124																			 const tcu::UVec3&			workGroupSize,
125																			 const DispatchCommandsVec& dispatchCommands);
126
127	virtual							~IndirectDispatchInstanceBufferUpload	(void) {}
128
129	virtual tcu::TestStatus			iterate									(void);
130
131protected:
132	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer	commandBuffer,
133																			 const Buffer&				indirectBuffer);
134
135	deBool							verifyResultBuffer						(const Buffer&				resultBuffer,
136																			 const vk::VkDeviceSize		resultBlockSize,
137																			 const vk::VkDeviceSize		resultBufferSize) const;
138
139	Context&						m_context;
140	const std::string				m_name;
141
142	const vk::DeviceInterface&		m_device_interface;
143	const vk::VkDevice				m_device;
144
145	const vk::VkQueue				m_queue;
146	const deUint32					m_queueFamilyIndex;
147
148	const deUintptr					m_bufferSize;
149	const tcu::UVec3				m_workGroupSize;
150	const DispatchCommandsVec		m_dispatchCommands;
151
152	vk::Allocator&					m_allocator;
153
154private:
155	IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&);
156	IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&);
157};
158
159IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context&					context,
160																			const std::string&			name,
161																			const deUintptr				bufferSize,
162																			const tcu::UVec3&			workGroupSize,
163																			const DispatchCommandsVec&	dispatchCommands)
164	: vkt::TestInstance		(context)
165	, m_context				(context)
166	, m_name				(name)
167	, m_device_interface	(context.getDeviceInterface())
168	, m_device				(context.getDevice())
169	, m_queue				(context.getUniversalQueue())
170	, m_queueFamilyIndex	(context.getUniversalQueueFamilyIndex())
171	, m_bufferSize			(bufferSize)
172	, m_workGroupSize		(workGroupSize)
173	, m_dispatchCommands	(dispatchCommands)
174	, m_allocator			(context.getDefaultAllocator())
175{
176}
177
178void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
179{
180	DE_UNREF(commandBuffer);
181
182	const vk::Allocation& alloc = indirectBuffer.getAllocation();
183	deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
184
185	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
186	{
187		DE_ASSERT(cmdIter->m_offset >= 0);
188		DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0);
189		DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize);
190
191		deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset];
192
193		dstPtr[0] = cmdIter->m_numWorkGroups[0];
194		dstPtr[1] = cmdIter->m_numWorkGroups[1];
195		dstPtr[2] = cmdIter->m_numWorkGroups[2];
196	}
197
198	vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize);
199}
200
201tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void)
202{
203	tcu::TestContext& testCtx = m_context.getTestContext();
204
205	testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage;
206	{
207		tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)");
208
209		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
210		{
211			testCtx.getLog()
212				<< tcu::TestLog::Message
213				<< cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups
214				<< tcu::TestLog::EndMessage;
215		}
216	}
217
218	// Create result buffer
219	const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE);
220	const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size();
221
222	Buffer resultBuffer(
223		m_device_interface, m_device, m_allocator,
224		makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
225		vk::MemoryRequirement::HostVisible);
226
227	{
228		const vk::Allocation& alloc = resultBuffer.getAllocation();
229		deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
230
231		for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
232		{
233			deUint8* const	dstPtr = &resultDataPtr[resultBlockSize*cmdNdx];
234
235			*(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0];
236			*(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1];
237			*(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2];
238			*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
239		}
240
241		vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
242	}
243
244	// Create verify compute shader
245	const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule(
246		m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u));
247
248	// Create descriptorSetLayout
249	vk::DescriptorSetLayoutBuilder layoutBuilder;
250	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
251	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
252
253	// Create compute pipeline
254	const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout));
255	const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader));
256
257	// Create descriptor pool
258	const vk::Unique<vk::VkDescriptorPool> descriptorPool(
259		vk::DescriptorPoolBuilder()
260		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size())
261		.build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size())));
262
263	const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier(
264		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize);
265
266	// Create command buffer
267	const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex));
268	const vk::Unique<vk::VkCommandBuffer> cmdBuffer(makeCommandBuffer(m_device_interface, m_device, *cmdPool));
269
270	// Begin recording commands
271	beginCommandBuffer(m_device_interface, *cmdBuffer);
272
273	// Create indirect buffer
274	Buffer indirectBuffer(
275		m_device_interface, m_device, m_allocator,
276		makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
277		vk::MemoryRequirement::HostVisible);
278	fillIndirectBufferData(*cmdBuffer, indirectBuffer);
279
280	// Bind compute pipeline
281	m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
282
283	// Allocate descriptor sets
284	typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet;
285	std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size());
286
287	vk::VkDeviceSize curOffset = 0;
288
289	// Create descriptor sets
290	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx)
291	{
292		descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>(
293									makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout)));
294
295		const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize);
296
297		vk::DescriptorSetUpdateBuilder descriptorSetBuilder;
298		descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
299		descriptorSetBuilder.update(m_device_interface, m_device);
300
301		// Bind descriptor set
302		m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL);
303
304		// Dispatch indirect compute command
305		m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset);
306
307		curOffset += resultBlockSize;
308	}
309
310	// Insert memory barrier
311	m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
312										  0, (const vk::VkMemoryBarrier*)DE_NULL,
313										  1, &ssboPostBarrier,
314										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
315
316	// End recording commands
317	endCommandBuffer(m_device_interface, *cmdBuffer);
318
319	// Wait for command buffer execution finish
320	submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer);
321
322	// Check if result buffer contains valid values
323	if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize))
324		return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass");
325	else
326		return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
327}
328
329deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer&			resultBuffer,
330																 const vk::VkDeviceSize	resultBlockSize,
331																 const vk::VkDeviceSize	resultBufferSize) const
332{
333	deBool allOk = true;
334	const vk::Allocation& alloc = resultBuffer.getAllocation();
335	vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize);
336
337	const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr());
338
339	for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++)
340	{
341		const DispatchCommand&	cmd = m_dispatchCommands[cmdNdx];
342		const deUint8* const	srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize;
343		const deUint32			numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
344		const deUint32			numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2];
345		const deUint32			numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2];
346		const deUint32			expectedCount = numInvocationsPerGroup * numGroups;
347
348		if (numPassed != expectedCount)
349		{
350			tcu::TestContext& testCtx = m_context.getTestContext();
351
352			testCtx.getLog()
353				<< tcu::TestLog::Message
354				<< "ERROR: got invalid result for invocation " << cmdNdx
355				<< ": got numPassed = " << numPassed << ", expected " << expectedCount
356				<< tcu::TestLog::EndMessage;
357
358			allOk = false;
359		}
360	}
361
362	return allOk;
363}
364
365class IndirectDispatchCaseBufferUpload : public vkt::TestCase
366{
367public:
368								IndirectDispatchCaseBufferUpload	(tcu::TestContext&			testCtx,
369																	 const DispatchCaseDesc&	caseDesc,
370																	 const glu::GLSLVersion		glslVersion);
371
372	virtual						~IndirectDispatchCaseBufferUpload	(void) {}
373
374	virtual void				initPrograms						(vk::SourceCollections&		programCollection) const;
375	virtual TestInstance*		createInstance						(Context&					context) const;
376
377protected:
378	const deUintptr				m_bufferSize;
379	const tcu::UVec3			m_workGroupSize;
380	const DispatchCommandsVec	m_dispatchCommands;
381	const glu::GLSLVersion		m_glslVersion;
382
383private:
384	IndirectDispatchCaseBufferUpload (const vkt::TestCase&);
385	IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&);
386};
387
388IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext&		testCtx,
389																	const DispatchCaseDesc& caseDesc,
390																	const glu::GLSLVersion	glslVersion)
391	: vkt::TestCase			(testCtx, caseDesc.m_name, caseDesc.m_description)
392	, m_bufferSize			(caseDesc.m_bufferSize)
393	, m_workGroupSize		(caseDesc.m_workGroupSize)
394	, m_dispatchCommands	(caseDesc.m_dispatchCommands)
395	, m_glslVersion			(glslVersion)
396{
397}
398
399void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const
400{
401	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
402
403	std::ostringstream	verifyBuffer;
404
405	verifyBuffer
406		<< versionDecl << "\n"
407		<< "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
408		<< "layout(set = 0, binding = 0, std430) buffer Result\n"
409		<< "{\n"
410		<< "    uvec3           expectedGroupCount;\n"
411		<< "    coherent uint   numPassed;\n"
412		<< "} result;\n"
413		<< "void main (void)\n"
414		<< "{\n"
415		<< "    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
416		<< "        atomicAdd(result.numPassed, 1u);\n"
417		<< "}\n";
418
419	std::map<std::string, std::string> args;
420
421	args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x());
422	args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y());
423	args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z());
424
425	std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args);
426
427	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString);
428}
429
430TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const
431{
432	return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
433}
434
435class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload
436{
437public:
438									IndirectDispatchInstanceBufferGenerate	(Context&					context,
439																			 const std::string&			name,
440																			 const deUintptr			bufferSize,
441																			 const tcu::UVec3&			workGroupSize,
442																			 const DispatchCommandsVec&	dispatchCommands)
443										: IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {}
444
445	virtual							~IndirectDispatchInstanceBufferGenerate	(void) {}
446
447protected:
448	virtual void					fillIndirectBufferData					(const vk::VkCommandBuffer	commandBuffer,
449																			 const Buffer&				indirectBuffer);
450
451	vk::Move<vk::VkDescriptorPool>	m_descriptorPool;
452	vk::Move<vk::VkDescriptorSet>	m_descriptorSet;
453	vk::Move<vk::VkPipelineLayout>	m_pipelineLayout;
454	vk::Move<vk::VkPipeline>		m_computePipeline;
455
456private:
457	IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&);
458	IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&);
459};
460
461void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer)
462{
463	// Create compute shader that generates data for indirect buffer
464	const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule(
465		m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u));
466
467	// Create descriptorSetLayout
468	vk::DescriptorSetLayoutBuilder layoutBuilder;
469	layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
470	vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device));
471
472	// Create compute pipeline
473	m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout);
474	m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader);
475
476	// Create descriptor pool
477	m_descriptorPool = vk::DescriptorPoolBuilder()
478		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
479		.build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
480
481	// Create descriptor set
482	m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout);
483
484	const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize);
485
486	vk::DescriptorSetUpdateBuilder	descriptorSetBuilder;
487	descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo);
488	descriptorSetBuilder.update(m_device_interface, m_device);
489
490	const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier(
491		vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize);
492
493	// Bind compute pipeline
494	m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline);
495
496	// Bind descriptor set
497	m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
498
499	// Dispatch compute command
500	m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u);
501
502	// Insert memory barrier
503	m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0,
504										  0, (const vk::VkMemoryBarrier*)DE_NULL,
505										  1, &bufferBarrier,
506										  0, (const vk::VkImageMemoryBarrier*)DE_NULL);
507}
508
509class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload
510{
511public:
512							IndirectDispatchCaseBufferGenerate	(tcu::TestContext&			testCtx,
513																 const DispatchCaseDesc&	caseDesc,
514																 const glu::GLSLVersion		glslVersion)
515								: IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {}
516
517	virtual					~IndirectDispatchCaseBufferGenerate	(void) {}
518
519	virtual void			initPrograms						(vk::SourceCollections&		programCollection) const;
520	virtual TestInstance*	createInstance						(Context&					context) const;
521
522private:
523	IndirectDispatchCaseBufferGenerate (const vkt::TestCase&);
524	IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&);
525};
526
527void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const
528{
529	IndirectDispatchCaseBufferUpload::initPrograms(programCollection);
530
531	const char* const	versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
532
533	std::ostringstream computeBuffer;
534
535	// Header
536	computeBuffer
537		<< versionDecl << "\n"
538		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
539		<< "layout(set = 0, binding = 0, std430) buffer Out\n"
540		<< "{\n"
541		<< "	highp uint data[];\n"
542		<< "};\n"
543		<< "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
544		<< "{\n"
545		<< "	data[offset+0u] = numWorkGroups.x;\n"
546		<< "	data[offset+1u] = numWorkGroups.y;\n"
547		<< "	data[offset+2u] = numWorkGroups.z;\n"
548		<< "}\n"
549		<< "void main (void)\n"
550		<< "{\n";
551
552	// Dispatch commands
553	for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter)
554	{
555		const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32));
556		DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset);
557
558		computeBuffer
559			<< "\twriteCmd(" << offs << "u, uvec3("
560			<< cmdIter->m_numWorkGroups.x() << "u, "
561			<< cmdIter->m_numWorkGroups.y() << "u, "
562			<< cmdIter->m_numWorkGroups.z() << "u));\n";
563	}
564
565	// Ending
566	computeBuffer << "}\n";
567
568	std::string computeString = computeBuffer.str();
569
570	programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString);
571}
572
573TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const
574{
575	return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands);
576}
577
578DispatchCommandsVec commandsVec (const DispatchCommand& cmd)
579{
580	DispatchCommandsVec vec;
581	vec.push_back(cmd);
582	return vec;
583}
584
585DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
586								 const DispatchCommand& cmd1,
587								 const DispatchCommand& cmd2,
588								 const DispatchCommand& cmd3,
589								 const DispatchCommand& cmd4)
590{
591	DispatchCommandsVec vec;
592	vec.push_back(cmd0);
593	vec.push_back(cmd1);
594	vec.push_back(cmd2);
595	vec.push_back(cmd3);
596	vec.push_back(cmd4);
597	return vec;
598}
599
600DispatchCommandsVec commandsVec (const DispatchCommand& cmd0,
601								 const DispatchCommand& cmd1,
602								 const DispatchCommand& cmd2,
603								 const DispatchCommand& cmd3,
604								 const DispatchCommand& cmd4,
605								 const DispatchCommand& cmd5,
606								 const DispatchCommand& cmd6)
607{
608	DispatchCommandsVec vec;
609	vec.push_back(cmd0);
610	vec.push_back(cmd1);
611	vec.push_back(cmd2);
612	vec.push_back(cmd3);
613	vec.push_back(cmd4);
614	vec.push_back(cmd5);
615	vec.push_back(cmd6);
616	return vec;
617}
618
619} // anonymous ns
620
621tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx)
622{
623	static const DispatchCaseDesc s_dispatchCases[] =
624	{
625		DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
626			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)))
627        ),
628		DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
629			commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5)))
630		),
631		DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1),
632			commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3)))
633		),
634		DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
635			commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1)))
636		),
637		DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1),
638			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1)))
639		),
640		DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1),
641			commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3)))
642		),
643		DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1),
644			commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0)))
645		),
646		DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
647			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
648						DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)),
649						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
650						DispatchCommand(40, tcu::UVec3(1, 1, 7)),
651						DispatchCommand(52, tcu::UVec3(1, 1, 4)))
652		),
653		DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2),
654			commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)),
655						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
656						DispatchCommand(0, tcu::UVec3(1, 1, 1)),
657						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
658						DispatchCommand(104, tcu::UVec3(1, 3, 1)),
659						DispatchCommand(52, tcu::UVec3(1, 1, 4)),
660						DispatchCommand(52, tcu::UVec3(1, 1, 4)))
661		),
662	};
663
664	de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests"));
665
666	tcu::TestCaseGroup* const	groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", "");
667	indirectComputeDispatchTests->addChild(groupBufferUpload);
668
669	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
670	{
671		groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
672	}
673
674	tcu::TestCaseGroup* const	groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", "");
675	indirectComputeDispatchTests->addChild(groupBufferGenerate);
676
677	for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++)
678	{
679		groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES));
680	}
681
682	return indirectComputeDispatchTests.release();
683}
684
685} // compute
686} // vkt
687