1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Indirect compute dispatch tests.
22 *//*--------------------------------------------------------------------*/
23
24#include "es31fIndirectComputeDispatchTests.hpp"
25#include "gluObjectWrapper.hpp"
26#include "gluRenderContext.hpp"
27#include "gluShaderProgram.hpp"
28#include "glwFunctions.hpp"
29#include "glwEnums.hpp"
30#include "tcuVector.hpp"
31#include "tcuStringTemplate.hpp"
32#include "tcuTestLog.hpp"
33#include "deStringUtil.hpp"
34
35#include <vector>
36#include <string>
37#include <map>
38
39namespace deqp
40{
41namespace gles31
42{
43namespace Functional
44{
45
46using tcu::UVec3;
47using tcu::TestLog;
48using std::vector;
49using std::string;
50using std::map;
51
52// \todo [2014-02-17 pyry] Should be extended with following:
53
54// Negative:
55//  - no active shader program
56//  - indirect negative or not aligned
57//  - indirect + size outside buffer bounds
58//  - no buffer bound to DRAW_INDIRECT_BUFFER
59//  - (implict) buffer mapped
60
61// Robustness:
62//  - lot of small work group launches
63//  - very large work group size
64//  - no synchronization, touched by gpu
65//  - compute program overwiting buffer
66
67namespace
68{
69
70enum
71{
72	RESULT_BLOCK_BASE_SIZE				= (3+1)*(int)sizeof(deUint32),		// uvec3 + uint
73	RESULT_BLOCK_EXPECTED_COUNT_OFFSET	= 0,
74	RESULT_BLOCK_NUM_PASSED_OFFSET		= 3*(int)sizeof(deUint32),
75
76	INDIRECT_COMMAND_SIZE				= 3*(int)sizeof(deUint32)
77};
78
79enum GenBuffer
80{
81	GEN_BUFFER_UPLOAD		= 0,
82	GEN_BUFFER_COMPUTE,
83
84	GEN_BUFFER_LAST
85};
86
87glu::ProgramSources genVerifySources (const UVec3& workGroupSize)
88{
89	static const char* s_verifyDispatchTmpl =
90		"#version 310 es\n"
91		"layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
92		"layout(binding = 0, std430) buffer Result\n"
93		"{\n"
94		"    uvec3           expectedGroupCount;\n"
95		"    coherent uint   numPassed;\n"
96		"} result;\n"
97		"void main (void)\n"
98		"{\n"
99		"    if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
100		"        atomicAdd(result.numPassed, 1u);\n"
101		"}\n";
102
103	map<string, string> args;
104
105	args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x());
106	args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y());
107	args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z());
108
109	return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args));
110}
111
112class IndirectDispatchCase : public TestCase
113{
114public:
115							IndirectDispatchCase	(Context& context, const char* name, const char* description, GenBuffer genBuffer);
116							~IndirectDispatchCase	(void);
117
118	IterateResult			iterate					(void);
119
120protected:
121	struct DispatchCommand
122	{
123		deIntptr	offset;
124		UVec3		numWorkGroups;
125
126		DispatchCommand (void) : offset(0) {}
127		DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {}
128	};
129
130	GenBuffer				m_genBuffer;
131	deUintptr				m_bufferSize;
132	UVec3					m_workGroupSize;
133	vector<DispatchCommand>	m_commands;
134
135	void					createCommandBuffer		(deUint32 buffer) const;
136	void					createResultBuffer		(deUint32 buffer) const;
137
138	bool					verifyResultBuffer		(deUint32 buffer);
139
140	void					createCmdBufferUpload	(deUint32 buffer) const;
141	void					createCmdBufferCompute	(deUint32 buffer) const;
142
143private:
144							IndirectDispatchCase	(const IndirectDispatchCase&);
145	IndirectDispatchCase&	operator=				(const IndirectDispatchCase&);
146};
147
148IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer)
149	: TestCase		(context, name, description)
150	, m_genBuffer	(genBuffer)
151	, m_bufferSize	(0)
152{
153}
154
155IndirectDispatchCase::~IndirectDispatchCase (void)
156{
157}
158
159static int getResultBlockAlignedSize (const glw::Functions& gl)
160{
161	const int	baseSize	= RESULT_BLOCK_BASE_SIZE;
162	int			alignment	= 0;
163	gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment);
164
165	if (alignment == 0 || (baseSize % alignment == 0))
166		return baseSize;
167	else
168		return (baseSize/alignment + 1)*alignment;
169}
170
171void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const
172{
173	switch (m_genBuffer)
174	{
175		case GEN_BUFFER_UPLOAD:		createCmdBufferUpload	(buffer);		break;
176		case GEN_BUFFER_COMPUTE:	createCmdBufferCompute	(buffer);		break;
177		default:
178			DE_ASSERT(false);
179	}
180}
181
182void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const
183{
184	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
185	vector<deUint8>			data	(m_bufferSize);
186
187	for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
188	{
189		DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3);
190		DE_ASSERT(cmdIter->offset >= 0);
191		DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0);
192		DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize);
193
194		deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset];
195
196		dstPtr[0] = cmdIter->numWorkGroups[0];
197		dstPtr[1] = cmdIter->numWorkGroups[1];
198		dstPtr[2] = cmdIter->numWorkGroups[2];
199	}
200
201	gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
202	gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW);
203}
204
205void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const
206{
207	std::ostringstream src;
208
209	// Header
210	src <<
211		"#version 310 es\n"
212		"layout(local_size_x = 1) in;\n"
213		"layout(std430, binding = 1) buffer Out\n"
214		"{\n"
215		"	highp uint data[];\n"
216		"};\n"
217		"void writeCmd (uint offset, uvec3 numWorkGroups)\n"
218		"{\n"
219		"	data[offset+0u] = numWorkGroups.x;\n"
220		"	data[offset+1u] = numWorkGroups.y;\n"
221		"	data[offset+2u] = numWorkGroups.z;\n"
222		"}\n"
223		"void main (void)\n"
224		"{\n";
225
226	// Commands
227	for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
228	{
229		const deUint32 offs = (deUint32)(cmdIter->offset/4);
230		DE_ASSERT((deIntptr)offs*4 == cmdIter->offset);
231
232		src << "\twriteCmd(" << offs << "u, uvec3("
233			<< cmdIter->numWorkGroups.x() << "u, "
234			<< cmdIter->numWorkGroups.y() << "u, "
235			<< cmdIter->numWorkGroups.z() << "u));\n";
236	}
237
238	src << "}\n";
239
240	{
241		const glw::Functions&	gl			= m_context.getRenderContext().getFunctions();
242		glu::ShaderProgram		program		(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str()));
243
244		m_testCtx.getLog() << program;
245		if (!program.isOk())
246			TCU_FAIL("Compile failed");
247
248		gl.useProgram(program.getProgram());
249
250		gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
251		gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW);
252		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer);
253		GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
254
255		gl.dispatchCompute(1,1,1);
256		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed");
257
258		gl.memoryBarrier(GL_COMMAND_BARRIER_BIT);
259		GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed");
260	}
261}
262
263void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const
264{
265	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
266	const int				resultBlockSize		= getResultBlockAlignedSize(gl);
267	const int				resultBufferSize	= resultBlockSize*(int)m_commands.size();
268	vector<deUint8>			data				(resultBufferSize);
269
270	for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
271	{
272		deUint8* const	dstPtr	= &data[resultBlockSize*cmdNdx];
273
274		*(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4)	= m_commands[cmdNdx].numWorkGroups[0];
275		*(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4)	= m_commands[cmdNdx].numWorkGroups[1];
276		*(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4)	= m_commands[cmdNdx].numWorkGroups[2];
277		*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET)			= 0;
278	}
279
280	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
281	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ);
282}
283
284deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups)
285{
286	const int	numInvocationsPerGroup	= workGroupSize[0]*workGroupSize[1]*workGroupSize[2];
287	const int	numGroups				= numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2];
288
289	return numInvocationsPerGroup*numGroups;
290}
291
292bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer)
293{
294	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
295
296	const int				resultBlockSize		= getResultBlockAlignedSize(gl);
297	const int				resultBufferSize	= resultBlockSize*(int)m_commands.size();
298
299	void*					mapPtr				= DE_NULL;
300	bool					allOk				= true;
301
302	try
303	{
304		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
305		mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT);
306
307		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed");
308		TCU_CHECK(mapPtr);
309
310		for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
311		{
312			const DispatchCommand&	cmd				= m_commands[cmdNdx];
313			const deUint8* const	srcPtr			= (const deUint8*)mapPtr + cmdNdx*resultBlockSize;
314			const deUint32			numPassed		= *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
315			const deUint32			expectedCount	= computeInvocationCount(m_workGroupSize, cmd.numWorkGroups);
316
317			// Verify numPassed.
318			if (numPassed != expectedCount)
319			{
320				m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
321													   << ": got numPassed = " << numPassed << ", expected " << expectedCount
322								   << TestLog::EndMessage;
323				allOk = false;
324			}
325		}
326	}
327	catch (...)
328	{
329		if (mapPtr)
330			gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
331	}
332
333	gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
334	GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed");
335
336	return allOk;
337}
338
339IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void)
340{
341	const glu::RenderContext&		renderCtx			= m_context.getRenderContext();
342	const glw::Functions&			gl					= renderCtx.getFunctions();
343
344	const glu::ShaderProgram		program				(renderCtx, genVerifySources(m_workGroupSize));
345
346	glu::Buffer						cmdBuffer			(renderCtx);
347	glu::Buffer						resultBuffer		(renderCtx);
348
349	m_testCtx.getLog() << program;
350	TCU_CHECK_MSG(program.isOk(), "Compile failed");
351
352	m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage;
353	{
354		tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)");
355
356		for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
357			m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset
358												   << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups
359							   << TestLog::EndMessage;
360	}
361
362	createResultBuffer(*resultBuffer);
363	createCommandBuffer(*cmdBuffer);
364
365	gl.useProgram(program.getProgram());
366	gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer);
367	GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed");
368
369	{
370		const int	resultBlockAlignedSize		= getResultBlockAlignedSize(gl);
371		deIntptr	curOffset					= 0;
372
373		for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
374		{
375			gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize);
376			gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset);
377
378			curOffset += resultBlockAlignedSize;
379		}
380	}
381
382	GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed");
383
384	if (verifyResultBuffer(*resultBuffer))
385		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
386	else
387		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
388
389	return STOP;
390}
391
392class SingleDispatchCase : public IndirectDispatchCase
393{
394public:
395	SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups)
396		: IndirectDispatchCase(context, name, description, genBuffer)
397	{
398		m_bufferSize	= bufferSize;
399		m_workGroupSize	= workGroupSize;
400		m_commands.push_back(DispatchCommand(offset, numWorkGroups));
401	}
402};
403
404class MultiDispatchCase : public IndirectDispatchCase
405{
406public:
407	MultiDispatchCase (Context& context, GenBuffer genBuffer)
408		: IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer)
409	{
410		m_bufferSize	= 1<<10;
411		m_workGroupSize	= UVec3(3,1,2);
412
413		m_commands.push_back(DispatchCommand(0,						UVec3(1,1,1)));
414		m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE,	UVec3(2,1,1)));
415		m_commands.push_back(DispatchCommand(104,					UVec3(1,3,1)));
416		m_commands.push_back(DispatchCommand(40,					UVec3(1,1,7)));
417		m_commands.push_back(DispatchCommand(52,					UVec3(1,1,4)));
418	}
419};
420
421class MultiDispatchReuseCommandCase : public IndirectDispatchCase
422{
423public:
424	MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer)
425		: IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer)
426	{
427		m_bufferSize	= 1<<10;
428		m_workGroupSize	= UVec3(3,1,2);
429
430		m_commands.push_back(DispatchCommand(0,						UVec3(1,1,1)));
431		m_commands.push_back(DispatchCommand(0,						UVec3(1,1,1)));
432		m_commands.push_back(DispatchCommand(0,						UVec3(1,1,1)));
433		m_commands.push_back(DispatchCommand(104,					UVec3(1,3,1)));
434		m_commands.push_back(DispatchCommand(104,					UVec3(1,3,1)));
435		m_commands.push_back(DispatchCommand(52,					UVec3(1,1,4)));
436		m_commands.push_back(DispatchCommand(52,					UVec3(1,1,4)));
437	}
438};
439
440} // anonymous
441
442IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context)
443	: TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests")
444{
445}
446
447IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void)
448{
449}
450
451void IndirectComputeDispatchTests::init (void)
452{
453	static const struct
454	{
455		const char*		name;
456		GenBuffer		gen;
457	} s_genBuffer[] =
458	{
459		{ "upload_buffer",		GEN_BUFFER_UPLOAD	},
460		{ "gen_in_compute",		GEN_BUFFER_COMPUTE	}
461	};
462
463	static const struct
464	{
465		const char*	name;
466		const char*	description;
467		deUintptr	bufferSize;
468		deUintptr	offset;
469		UVec3		workGroupSize;
470		UVec3		numWorkGroups;
471	} s_singleDispatchCases[] =
472	{
473	//	Name										Desc											BufferSize					Offs			WorkGroupSize	NumWorkGroups
474		{ "single_invocation",						"Single invocation only from offset 0",			INDIRECT_COMMAND_SIZE,		0,				UVec3(1,1,1),	UVec3(1,1,1) },
475		{ "multiple_groups",						"Multiple groups dispatched from offset 0",		INDIRECT_COMMAND_SIZE,		0,				UVec3(1,1,1),	UVec3(2,3,5) },
476		{ "multiple_groups_multiple_invocations",	"Multiple groups of size 2x3x1 from offset 0",	INDIRECT_COMMAND_SIZE,		0,				UVec3(2,3,1),	UVec3(1,2,3) },
477		{ "small_offset",							"Small offset",									16+INDIRECT_COMMAND_SIZE,	16,				UVec3(1,1,1),	UVec3(1,1,1) },
478		{ "large_offset",							"Large offset",									(2<<20),					(1<<20) + 12,	UVec3(1,1,1),	UVec3(1,1,1) },
479		{ "large_offset_multiple_invocations",		"Large offset, multiple invocations",			(2<<20),					(1<<20) + 12,	UVec3(2,3,1),	UVec3(1,2,3) },
480		{ "empty_command",							"Empty command",								INDIRECT_COMMAND_SIZE,		0,				UVec3(1,1,1),	UVec3(0,0,0) },
481	};
482
483	for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++)
484	{
485		const GenBuffer				genBuf		= s_genBuffer[genNdx].gen;
486		tcu::TestCaseGroup* const	genGroup	= new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, "");
487		addChild(genGroup);
488
489		for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++)
490			genGroup->addChild(new SingleDispatchCase(m_context,
491													  s_singleDispatchCases[ndx].name,
492													  s_singleDispatchCases[ndx].description,
493													  genBuf,
494													  s_singleDispatchCases[ndx].bufferSize,
495													  s_singleDispatchCases[ndx].offset,
496													  s_singleDispatchCases[ndx].workGroupSize,
497													  s_singleDispatchCases[ndx].numWorkGroups));
498
499		genGroup->addChild(new MultiDispatchCase				(m_context, genBuf));
500		genGroup->addChild(new MultiDispatchReuseCommandCase	(m_context, genBuf));
501	}
502}
503
504} // Functional
505} // gles31
506} // deqp
507