es31fBasicComputeShaderTests.cpp revision 8852c82a1ffa4760985c17cc6875d5d521daf343
1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Basic Compute Shader Tests.
22 *//*--------------------------------------------------------------------*/
23
24#include "es31fBasicComputeShaderTests.hpp"
25#include "gluShaderProgram.hpp"
26#include "gluObjectWrapper.hpp"
27#include "gluRenderContext.hpp"
28#include "gluProgramInterfaceQuery.hpp"
29#include "gluContextInfo.hpp"
30#include "glwFunctions.hpp"
31#include "glwEnums.hpp"
32#include "tcuTestLog.hpp"
33#include "deRandom.hpp"
34#include "deStringUtil.hpp"
35#include "deMemory.h"
36
37namespace deqp
38{
39namespace gles31
40{
41namespace Functional
42{
43
44using std::string;
45using std::vector;
46using tcu::TestLog;
47using namespace glu;
48
49//! Utility for mapping buffers.
50class BufferMemMap
51{
52public:
53	BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access)
54		: m_gl		(gl)
55		, m_target	(target)
56		, m_ptr		(DE_NULL)
57	{
58		m_ptr = gl.mapBufferRange(target, offset, size, access);
59		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
60		TCU_CHECK(m_ptr);
61	}
62
63	~BufferMemMap (void)
64	{
65		m_gl.unmapBuffer(m_target);
66	}
67
68	void*	getPtr		(void) const { return m_ptr; }
69	void*	operator*	(void) const { return m_ptr; }
70
71private:
72							BufferMemMap			(const BufferMemMap& other);
73	BufferMemMap&			operator=				(const BufferMemMap& other);
74
75	const glw::Functions&	m_gl;
76	const deUint32			m_target;
77	void*					m_ptr;
78};
79
80namespace
81{
82
83class EmptyComputeShaderCase : public TestCase
84{
85public:
86	EmptyComputeShaderCase (Context& context)
87		: TestCase(context, "empty", "Empty shader")
88	{
89	}
90
91	IterateResult iterate (void)
92	{
93		const ShaderProgram program(m_context.getRenderContext(),
94			ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE,
95				"#version 310 es\n"
96				"layout (local_size_x = 1) in;\n"
97				"void main (void) {}\n"
98				));
99
100		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
101
102		m_testCtx.getLog() << program;
103		if (!program.isOk())
104			TCU_FAIL("Compile failed");
105
106		gl.useProgram(program.getProgram());
107		gl.dispatchCompute(1, 1, 1);
108		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
109
110		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
111		return STOP;
112	}
113};
114
115class UBOToSSBOInvertCase : public TestCase
116{
117public:
118	UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
119		: TestCase		(context, name, description)
120		, m_numValues	(numValues)
121		, m_localSize	(localSize)
122		, m_workSize	(workSize)
123	{
124		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
125	}
126
127	IterateResult iterate (void)
128	{
129		std::ostringstream src;
130		src << "#version 310 es\n"
131			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
132			<< "uniform Input {\n"
133			<< "    uint values[" << m_numValues << "];\n"
134			<< "} ub_in;\n"
135			<< "layout(binding = 1) buffer Output {\n"
136			<< "    uint values[" << m_numValues << "];\n"
137			<< "} sb_out;\n"
138			<< "void main (void) {\n"
139			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
140			<< "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
141			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
142			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
143			<< "\n"
144			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
145			<< "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
146			<< "}\n";
147
148		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
149		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
150		const Buffer				inputBuffer		(m_context.getRenderContext());
151		const Buffer				outputBuffer	(m_context.getRenderContext());
152		std::vector<deUint32>		inputValues		(m_numValues);
153
154		// Compute input values.
155		{
156			de::Random rnd(0x111223f);
157			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
158				inputValues[ndx] = rnd.getUint32();
159		}
160
161		m_testCtx.getLog() << program;
162		if (!program.isOk())
163			TCU_FAIL("Compile failed");
164
165		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
166
167		gl.useProgram(program.getProgram());
168
169		// Input buffer setup
170		{
171			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input");
172			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex);
173			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values");
174			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex);
175
176			gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer);
177			gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
178
179			{
180				const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
181
182				for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++)
183					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
184			}
185
186			gl.uniformBlockBinding(program.getProgram(), blockIndex, 0);
187			gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer);
188			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
189		}
190
191		// Output buffer setup
192		{
193			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
194			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
195
196			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
197			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
198			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer);
199			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
200		}
201
202		// Dispatch compute workload
203		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
204		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
205
206		// Read back and compare
207		{
208			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
209			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
210			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
211			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
212			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
213
214			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
215			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
216			{
217				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
218				const deUint32	ref		= ~inputValues[ndx];
219
220				if (res != ref)
221					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
222			}
223		}
224
225		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
226		return STOP;
227	}
228
229private:
230	const int			m_numValues;
231	const tcu::IVec3	m_localSize;
232	const tcu::IVec3	m_workSize;
233};
234
235class CopyInvertSSBOCase : public TestCase
236{
237public:
238	CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
239		: TestCase		(context, name, description)
240		, m_numValues	(numValues)
241		, m_localSize	(localSize)
242		, m_workSize	(workSize)
243	{
244		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
245	}
246
247	IterateResult iterate (void)
248	{
249		std::ostringstream src;
250		src << "#version 310 es\n"
251			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
252			<< "layout(binding = 0) buffer Input {\n"
253			<< "    uint values[" << m_numValues << "];\n"
254			<< "} sb_in;\n"
255			<< "layout (binding = 1) buffer Output {\n"
256			<< "    uint values[" << m_numValues << "];\n"
257			<< "} sb_out;\n"
258			<< "void main (void) {\n"
259			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
260			<< "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
261			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
262			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
263			<< "\n"
264			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
265			<< "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
266			<< "}\n";
267
268		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
269		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
270		const Buffer				inputBuffer		(m_context.getRenderContext());
271		const Buffer				outputBuffer	(m_context.getRenderContext());
272		std::vector<deUint32>		inputValues		(m_numValues);
273
274		// Compute input values.
275		{
276			de::Random rnd(0x124fef);
277			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
278				inputValues[ndx] = rnd.getUint32();
279		}
280
281		m_testCtx.getLog() << program;
282		if (!program.isOk())
283			TCU_FAIL("Compile failed");
284
285		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
286
287		gl.useProgram(program.getProgram());
288
289		// Input buffer setup
290		{
291			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
292			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
293			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
294			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
295
296			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
297			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
298
299			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
300
301			{
302				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
303
304				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
305					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
306			}
307
308			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
309			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
310		}
311
312		// Output buffer setup
313		{
314			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
315			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
316
317			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
318			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ);
319			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer);
320			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
321		}
322
323		// Dispatch compute workload
324		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
325		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
326
327		// Read back and compare
328		{
329			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
330			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
331			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
332			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
333			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
334
335			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
336			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
337			{
338				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
339				const deUint32	ref		= ~inputValues[ndx];
340
341				if (res != ref)
342					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
343			}
344		}
345
346		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
347		return STOP;
348	}
349
350private:
351	const int			m_numValues;
352	const tcu::IVec3	m_localSize;
353	const tcu::IVec3	m_workSize;
354};
355
356class InvertSSBOInPlaceCase : public TestCase
357{
358public:
359	InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
360		: TestCase		(context, name, description)
361		, m_numValues	(numValues)
362		, m_isSized		(isSized)
363		, m_localSize	(localSize)
364		, m_workSize	(workSize)
365	{
366		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
367	}
368
369	IterateResult iterate (void)
370	{
371		std::ostringstream src;
372		src << "#version 310 es\n"
373			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
374			<< "layout(binding = 0) buffer InOut {\n"
375			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
376			<< "} sb_inout;\n"
377			<< "void main (void) {\n"
378			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
379			<< "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
380			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
381			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
382			<< "\n"
383			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
384			<< "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
385			<< "}\n";
386
387		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
388		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
389
390		m_testCtx.getLog() << program;
391		if (!program.isOk())
392			TCU_FAIL("Compile failed");
393
394		const Buffer				outputBuffer	(m_context.getRenderContext());
395		const deUint32				valueIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values");
396		const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
397		const deUint32				blockSize		= valueInfo.arrayStride*(deUint32)m_numValues;
398		std::vector<deUint32>		inputValues		(m_numValues);
399
400		// Compute input values.
401		{
402			de::Random rnd(0x82ce7f);
403			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
404				inputValues[ndx] = rnd.getUint32();
405		}
406
407		TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
408
409		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
410
411		gl.useProgram(program.getProgram());
412
413		// Output buffer setup
414		{
415			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
416			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW);
417
418			{
419				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT);
420
421				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
422					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
423			}
424
425			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
426			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
427		}
428
429		// Dispatch compute workload
430		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
431		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
432
433		// Read back and compare
434		{
435			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
436
437			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
438			{
439				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
440				const deUint32	ref		= ~inputValues[ndx];
441
442				if (res != ref)
443					throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]");
444			}
445		}
446
447		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
448		return STOP;
449	}
450
451private:
452	const int			m_numValues;
453	const bool			m_isSized;
454	const tcu::IVec3	m_localSize;
455	const tcu::IVec3	m_workSize;
456};
457
458class WriteToMultipleSSBOCase : public TestCase
459{
460public:
461	WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
462		: TestCase		(context, name, description)
463		, m_numValues	(numValues)
464		, m_isSized		(isSized)
465		, m_localSize	(localSize)
466		, m_workSize	(workSize)
467	{
468		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
469	}
470
471	IterateResult iterate (void)
472	{
473		std::ostringstream src;
474		src << "#version 310 es\n"
475			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
476			<< "layout(binding = 0) buffer Out0 {\n"
477			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
478			<< "} sb_out0;\n"
479			<< "layout(binding = 1) buffer Out1 {\n"
480			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
481			<< "} sb_out1;\n"
482			<< "void main (void) {\n"
483			<< "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
484			<< "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
485			<< "\n"
486			<< "    {\n"
487			<< "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
488			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
489			<< "\n"
490			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
491			<< "            sb_out0.values[offset + ndx] = offset + ndx;\n"
492			<< "    }\n"
493			<< "    {\n"
494			<< "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
495			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
496			<< "\n"
497			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
498			<< "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
499			<< "    }\n"
500			<< "}\n";
501
502		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
503		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
504
505		m_testCtx.getLog() << program;
506		if (!program.isOk())
507			TCU_FAIL("Compile failed");
508
509		const Buffer				outputBuffer0	(m_context.getRenderContext());
510		const deUint32				value0Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values");
511		const InterfaceVariableInfo	value0Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index);
512		const deUint32				block0Size		= value0Info.arrayStride*(deUint32)m_numValues;
513
514		const Buffer				outputBuffer1	(m_context.getRenderContext());
515		const deUint32				value1Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values");
516		const InterfaceVariableInfo	value1Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index);
517		const deUint32				block1Size		= value1Info.arrayStride*(deUint32)m_numValues;
518
519		TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
520		TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
521
522		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
523
524		gl.useProgram(program.getProgram());
525
526		// Output buffer setup
527		{
528			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
529			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW);
530
531			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0);
532			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
533		}
534		{
535			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
536			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW);
537
538			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1);
539			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
540		}
541
542		// Dispatch compute workload
543		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
544		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
545
546		// Read back and compare
547		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
548		{
549			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT);
550
551			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
552			{
553				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx));
554				const deUint32	ref		= ndx;
555
556				if (res != ref)
557					throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
558			}
559		}
560		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
561		{
562			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT);
563
564			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
565			{
566				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx));
567				const deUint32	ref		= m_numValues - ndx;
568
569				if (res != ref)
570					throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
571			}
572		}
573		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
574		return STOP;
575	}
576
577private:
578	const int			m_numValues;
579	const bool			m_isSized;
580	const tcu::IVec3	m_localSize;
581	const tcu::IVec3	m_workSize;
582};
583
584class SSBOLocalBarrierCase : public TestCase
585{
586public:
587	SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
588		: TestCase		(context, name, description)
589		, m_localSize	(localSize)
590		, m_workSize	(workSize)
591	{
592	}
593
594	IterateResult iterate (void)
595	{
596		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
597		const Buffer				outputBuffer	(m_context.getRenderContext());
598		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
599		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
600		const int					numValues		= workGroupSize*workGroupCount;
601
602		std::ostringstream src;
603		src << "#version 310 es\n"
604			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
605			<< "layout(binding = 0) buffer Output {\n"
606			<< "    coherent uint values[" << numValues << "];\n"
607			<< "} sb_out;\n\n"
608			<< "shared uint offsets[" << workGroupSize << "];\n\n"
609			<< "void main (void) {\n"
610			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
611			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
612			<< "    uint globalOffs = localSize*globalNdx;\n"
613			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
614			<< "\n"
615			<< "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
616			<< "    memoryBarrierBuffer();\n"
617			<< "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"
618			<< "    memoryBarrierBuffer();\n"
619			<< "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
620			<< "}\n";
621
622		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
623
624		m_testCtx.getLog() << program;
625		if (!program.isOk())
626			TCU_FAIL("Compile failed");
627
628		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
629
630		gl.useProgram(program.getProgram());
631
632		// Output buffer setup
633		{
634			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
635			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
636
637			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
638			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
639			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
640			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
641		}
642
643		// Dispatch compute workload
644		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
645		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
646
647		// Read back and compare
648		{
649			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
650			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
651			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
652			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
653			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
654
655			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
656			{
657				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
658				{
659					const int		globalOffs	= groupNdx*workGroupSize;
660					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
661					const int		offs0		= localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize);
662					const int		offs1		= localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize);
663					const deUint32	ref			= (deUint32)(globalOffs + offs0 + offs1);
664
665					if (res != ref)
666						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
667				}
668			}
669		}
670
671		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
672		return STOP;
673	}
674
675private:
676	const tcu::IVec3	m_localSize;
677	const tcu::IVec3	m_workSize;
678};
679
680class SSBOBarrierCase : public TestCase
681{
682public:
683	SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize)
684		: TestCase		(context, name, description)
685		, m_workSize	(workSize)
686	{
687	}
688
689	IterateResult iterate (void)
690	{
691		const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() <<
692			ComputeSource("#version 310 es\n"
693						  "layout (local_size_x = 1) in;\n"
694						  "uniform uint u_baseVal;\n"
695						  "layout(binding = 1) buffer Output {\n"
696						  "    uint values[];\n"
697						  "};\n"
698						  "void main (void) {\n"
699						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
700						  "    values[offset] = u_baseVal+offset;\n"
701						  "}\n"));
702		const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() <<
703			ComputeSource("#version 310 es\n"
704						  "layout (local_size_x = 1) in;\n"
705						  "uniform uint u_baseVal;\n"
706						  "layout(binding = 1) buffer Input {\n"
707						  "    uint values[];\n"
708						  "};\n"
709						  "layout(binding = 0) buffer Output {\n"
710						  "    coherent uint sum;\n"
711						  "};\n"
712						  "void main (void) {\n"
713						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
714						  "    uint value  = values[offset];\n"
715						  "    atomicAdd(sum, value);\n"
716						  "}\n"));
717
718		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
719		const Buffer				tempBuffer		(m_context.getRenderContext());
720		const Buffer				outputBuffer	(m_context.getRenderContext());
721		const deUint32				baseValue		= 127;
722
723		m_testCtx.getLog() << program0 << program1;
724		if (!program0.isOk() || !program1.isOk())
725			TCU_FAIL("Compile failed");
726
727		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
728
729		// Temp buffer setup
730		{
731			const deUint32				valueIndex		= gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]");
732			const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
733			const deUint32				bufferSize		= valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2];
734
735			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer);
736			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW);
737			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer);
738			GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed");
739		}
740
741		// Output buffer setup
742		{
743			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
744			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
745
746			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
747			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
748
749			{
750				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
751				deMemset(bufMap.getPtr(), 0, blockSize);
752			}
753
754			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
755			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
756		}
757
758		// Dispatch compute workload
759		gl.useProgram(program0.getProgram());
760		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
761		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
762		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
763		gl.useProgram(program1.getProgram());
764		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
765		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
766
767		// Read back and compare
768		{
769			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
770			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
771			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
772			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
773			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
774
775			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
776			deUint32					ref			= 0;
777
778			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++)
779				ref += baseValue + (deUint32)ndx;
780
781			if (res != ref)
782			{
783				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
784				throw tcu::TestError("Comparison failed");
785			}
786		}
787
788		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
789		return STOP;
790	}
791
792private:
793	const tcu::IVec3	m_workSize;
794};
795
796class BasicSharedVarCase : public TestCase
797{
798public:
799	BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
800		: TestCase		(context, name, description)
801		, m_localSize	(localSize)
802		, m_workSize	(workSize)
803	{
804	}
805
806	IterateResult iterate (void)
807	{
808		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
809		const Buffer				outputBuffer	(m_context.getRenderContext());
810		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
811		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
812		const int					numValues		= workGroupSize*workGroupCount;
813
814		std::ostringstream src;
815		src << "#version 310 es\n"
816			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
817			<< "layout(binding = 0) buffer Output {\n"
818			<< "    uint values[" << numValues << "];\n"
819			<< "} sb_out;\n\n"
820			<< "shared uint offsets[" << workGroupSize << "];\n\n"
821			<< "void main (void) {\n"
822			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
823			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
824			<< "    uint globalOffs = localSize*globalNdx;\n"
825			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
826			<< "\n"
827			<< "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
828			<< "    barrier();\n"
829			<< "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
830			<< "}\n";
831
832		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
833
834		m_testCtx.getLog() << program;
835		if (!program.isOk())
836			TCU_FAIL("Compile failed");
837
838		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
839
840		gl.useProgram(program.getProgram());
841
842		// Output buffer setup
843		{
844			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
845			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
846
847			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
848			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
849			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
850			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
851		}
852
853		// Dispatch compute workload
854		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
855		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
856
857		// Read back and compare
858		{
859			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
860			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
861			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
862			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
863			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
864
865			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
866			{
867				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
868				{
869					const int		globalOffs	= groupNdx*workGroupSize;
870					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
871					const deUint32	ref			= (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1));
872
873					if (res != ref)
874						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
875				}
876			}
877		}
878
879		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
880		return STOP;
881	}
882
883private:
884	const tcu::IVec3	m_localSize;
885	const tcu::IVec3	m_workSize;
886};
887
888class SharedVarAtomicOpCase : public TestCase
889{
890public:
891	SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
892		: TestCase		(context, name, description)
893		, m_localSize	(localSize)
894		, m_workSize	(workSize)
895	{
896	}
897
898	IterateResult iterate (void)
899	{
900		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
901		const Buffer				outputBuffer	(m_context.getRenderContext());
902		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
903		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
904		const int					numValues		= workGroupSize*workGroupCount;
905
906		std::ostringstream src;
907		src << "#version 310 es\n"
908			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
909			<< "layout(binding = 0) buffer Output {\n"
910			<< "    uint values[" << numValues << "];\n"
911			<< "} sb_out;\n\n"
912			<< "shared uint count;\n\n"
913			<< "void main (void) {\n"
914			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
915			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
916			<< "    uint globalOffs = localSize*globalNdx;\n"
917			<< "\n"
918			<< "    count = 0u;\n"
919			<< "    barrier();\n"
920			<< "    uint oldVal = atomicAdd(count, 1u);\n"
921			<< "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
922			<< "}\n";
923
924		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
925
926		m_testCtx.getLog() << program;
927		if (!program.isOk())
928			TCU_FAIL("Compile failed");
929
930		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
931
932		gl.useProgram(program.getProgram());
933
934		// Output buffer setup
935		{
936			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
937			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
938
939			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
940			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
941			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
942			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
943		}
944
945		// Dispatch compute workload
946		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
947		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
948
949		// Read back and compare
950		{
951			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
952			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
953			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
954			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
955			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
956
957			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
958			{
959				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
960				{
961					const int		globalOffs	= groupNdx*workGroupSize;
962					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
963					const deUint32	ref			= (deUint32)(localOffs+1);
964
965					if (res != ref)
966						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
967				}
968			}
969		}
970
971		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
972		return STOP;
973	}
974
975private:
976	const tcu::IVec3	m_localSize;
977	const tcu::IVec3	m_workSize;
978};
979
980class CopyImageToSSBOCase : public TestCase
981{
982public:
983	CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
984		: TestCase		(context, name, description)
985		, m_localSize	(localSize)
986		, m_imageSize	(imageSize)
987	{
988		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
989		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
990	}
991
992	IterateResult iterate (void)
993	{
994
995		std::ostringstream src;
996		src << "#version 310 es\n"
997			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
998			<< "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n"
999			<< "layout(binding = 0) buffer Output {\n"
1000			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1001			<< "} sb_out;\n\n"
1002			<< "void main (void) {\n"
1003			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1004			<< "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
1005			<< "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
1006			<< "}\n";
1007
1008		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1009		const Buffer				outputBuffer	(m_context.getRenderContext());
1010		const Texture				inputTexture	(m_context.getRenderContext());
1011		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1012		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
1013		de::Random					rnd				(0xab2c7);
1014		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
1015
1016		m_testCtx.getLog() << program;
1017		if (!program.isOk())
1018			TCU_FAIL("Compile failed");
1019
1020		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1021
1022		gl.useProgram(program.getProgram());
1023
1024		// Input values
1025		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1026			*i = rnd.getUint32();
1027
1028		// Input image setup
1029		gl.bindTexture(GL_TEXTURE_2D, *inputTexture);
1030		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1031		gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]);
1032		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1033		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1034		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1035
1036		// Bind to unit 1
1037		gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
1038		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1039
1040		// Output buffer setup
1041		{
1042			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1043			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1044
1045			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1046			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1047			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1048			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1049		}
1050
1051		// Dispatch compute workload
1052		gl.dispatchCompute(workSize[0], workSize[1], 1);
1053		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1054
1055		// Read back and compare
1056		{
1057			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1058			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1059			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1060			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1061			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1062
1063			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1064
1065			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
1066			{
1067				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
1068				const deUint32	ref		= inputValues[ndx];
1069
1070				if (res != ref)
1071					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
1072			}
1073		}
1074
1075		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1076		return STOP;
1077	}
1078
1079private:
1080	const tcu::IVec2	m_localSize;
1081	const tcu::IVec2	m_imageSize;
1082};
1083
1084class CopySSBOToImageCase : public TestCase
1085{
1086public:
1087	CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
1088		: TestCase		(context, name, description)
1089		, m_localSize	(localSize)
1090		, m_imageSize	(imageSize)
1091	{
1092		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
1093		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
1094	}
1095
1096	IterateResult iterate (void)
1097	{
1098
1099		std::ostringstream src;
1100		src << "#version 310 es\n"
1101			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
1102			<< "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n"
1103			<< "buffer Input {\n"
1104			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
1105			<< "} sb_in;\n\n"
1106			<< "void main (void) {\n"
1107			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1108			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1109			<< "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
1110			<< "}\n";
1111
1112		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1113		const Buffer				inputBuffer		(m_context.getRenderContext());
1114		const Texture				outputTexture	(m_context.getRenderContext());
1115		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1116		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
1117		de::Random					rnd				(0x77238ac2);
1118		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
1119
1120		m_testCtx.getLog() << program;
1121		if (!program.isOk())
1122			TCU_FAIL("Compile failed");
1123
1124		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
1125
1126		gl.useProgram(program.getProgram());
1127
1128		// Input values
1129		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1130			*i = rnd.getUint32();
1131
1132		// Input buffer setup
1133		{
1134			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1135			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1136			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1137			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1138
1139			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1140			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1141
1142			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1143
1144			{
1145				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1146
1147				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1148					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1149			}
1150
1151			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1152			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1153		}
1154
1155		// Output image setup
1156		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1157		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1158		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1159		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1160		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1161
1162		// Bind to unit 1
1163		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1164		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1165
1166		// Dispatch compute workload
1167		gl.dispatchCompute(workSize[0], workSize[1], 1);
1168		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1169
1170		// Read back and compare
1171		{
1172			Framebuffer			fbo			(m_context.getRenderContext());
1173			vector<deUint32>	pixels		(inputValues.size()*4);
1174
1175			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1176			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1177			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1178
1179			// \note In ES3 we have to use GL_RGBA_INTEGER
1180			gl.readBuffer(GL_COLOR_ATTACHMENT0);
1181			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1182			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1183
1184			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1185			{
1186				const deUint32	res		= pixels[ndx*4];
1187				const deUint32	ref		= inputValues[ndx];
1188
1189				if (res != ref)
1190					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx));
1191			}
1192		}
1193
1194		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1195		return STOP;
1196	}
1197
1198private:
1199	const tcu::IVec2	m_localSize;
1200	const tcu::IVec2	m_imageSize;
1201};
1202
1203class ImageAtomicOpCase : public TestCase
1204{
1205public:
1206	ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize)
1207		: TestCase		(context, name, description)
1208		, m_localSize	(localSize)
1209		, m_imageSize	(imageSize)
1210	{
1211	}
1212
1213	void init (void)
1214	{
1215		if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1216			throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension");
1217	}
1218
1219	IterateResult iterate (void)
1220	{
1221
1222		std::ostringstream src;
1223		src << "#version 310 es\n"
1224			<< "#extension GL_OES_shader_image_atomic : require\n"
1225			<< "layout (local_size_x = " << m_localSize << ") in;\n"
1226			<< "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n"
1227			<< "buffer Input {\n"
1228			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n"
1229			<< "} sb_in;\n\n"
1230			<< "void main (void) {\n"
1231			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
1232			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
1233			<< "\n"
1234			<< "    if (gl_LocalInvocationIndex == 0u)\n"
1235			<< "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
1236			<< "    barrier();\n"
1237			<< "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
1238			<< "}\n";
1239
1240		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1241		const Buffer				inputBuffer		(m_context.getRenderContext());
1242		const Texture				outputTexture	(m_context.getRenderContext());
1243		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
1244		de::Random					rnd				(0x77238ac2);
1245		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]*m_localSize);
1246
1247		m_testCtx.getLog() << program;
1248		if (!program.isOk())
1249			TCU_FAIL("Compile failed");
1250
1251		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage;
1252
1253		gl.useProgram(program.getProgram());
1254
1255		// Input values
1256		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
1257			*i = rnd.getUint32();
1258
1259		// Input buffer setup
1260		{
1261			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
1262			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
1263			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
1264			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1265
1266			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
1267			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
1268
1269			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
1270
1271			{
1272				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
1273
1274				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
1275					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
1276			}
1277
1278			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
1279			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
1280		}
1281
1282		// Output image setup
1283		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
1284		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
1285		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1286		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1287		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1288
1289		// Bind to unit 1
1290		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1291		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1292
1293		// Dispatch compute workload
1294		gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1);
1295		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1296
1297		// Read back and compare
1298		{
1299			Framebuffer			fbo			(m_context.getRenderContext());
1300			vector<deUint32>	pixels		(m_imageSize[0]*m_imageSize[1]*4);
1301
1302			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
1303			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
1304			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
1305
1306			// \note In ES3 we have to use GL_RGBA_INTEGER
1307			gl.readBuffer(GL_COLOR_ATTACHMENT0);
1308			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
1309			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
1310
1311			for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++)
1312			{
1313				const deUint32	res		= pixels[pixelNdx*4];
1314				deUint32		ref		= 0;
1315
1316				for (int offs = 0; offs < m_localSize; offs++)
1317					ref += inputValues[pixelNdx*m_localSize + offs];
1318
1319				if (res != ref)
1320					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx));
1321			}
1322		}
1323
1324		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1325		return STOP;
1326	}
1327
1328private:
1329	const int			m_localSize;
1330	const tcu::IVec2	m_imageSize;
1331};
1332
1333class ImageBarrierCase : public TestCase
1334{
1335public:
1336	ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize)
1337		: TestCase		(context, name, description)
1338		, m_workSize	(workSize)
1339	{
1340	}
1341
1342	IterateResult iterate (void)
1343	{
1344		const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() <<
1345			ComputeSource("#version 310 es\n"
1346						  "layout (local_size_x = 1) in;\n"
1347						  "uniform uint u_baseVal;\n"
1348						  "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n"
1349						  "void main (void) {\n"
1350						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1351						  "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n"
1352						  "}\n"));
1353		const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() <<
1354			ComputeSource("#version 310 es\n"
1355						  "layout (local_size_x = 1) in;\n"
1356						  "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n"
1357						  "layout(binding = 0) buffer Output {\n"
1358						  "    coherent uint sum;\n"
1359						  "};\n"
1360						  "void main (void) {\n"
1361						  "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
1362						  "    atomicAdd(sum, value);\n"
1363						  "}\n"));
1364
1365		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1366		const Texture				tempTexture		(m_context.getRenderContext());
1367		const Buffer				outputBuffer	(m_context.getRenderContext());
1368		const deUint32				baseValue		= 127;
1369
1370		m_testCtx.getLog() << program0 << program1;
1371		if (!program0.isOk() || !program1.isOk())
1372			TCU_FAIL("Compile failed");
1373
1374		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1375
1376		// Temp texture setup
1377		gl.bindTexture(GL_TEXTURE_2D, *tempTexture);
1378		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]);
1379		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1380		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1381		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
1382
1383		// Bind to unit 2
1384		gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1385		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
1386
1387		// Output buffer setup
1388		{
1389			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1390			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1391
1392			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1393			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1394
1395			{
1396				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
1397				deMemset(bufMap.getPtr(), 0, blockSize);
1398			}
1399
1400			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1401			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1402		}
1403
1404		// Dispatch compute workload
1405		gl.useProgram(program0.getProgram());
1406		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
1407		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1408		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1409		gl.useProgram(program1.getProgram());
1410		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
1411		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
1412
1413		// Read back and compare
1414		{
1415			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1416			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1417			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
1418			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1419			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1420
1421			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
1422			deUint32					ref			= 0;
1423
1424			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++)
1425				ref += baseValue + (deUint32)ndx;
1426
1427			if (res != ref)
1428			{
1429				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
1430				throw tcu::TestError("Comparison failed");
1431			}
1432		}
1433
1434		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1435		return STOP;
1436	}
1437
1438private:
1439	const tcu::IVec2	m_workSize;
1440};
1441
1442class AtomicCounterCase : public TestCase
1443{
1444public:
1445	AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
1446		: TestCase		(context, name, description)
1447		, m_localSize	(localSize)
1448		, m_workSize	(workSize)
1449	{
1450	}
1451
1452	IterateResult iterate (void)
1453	{
1454		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
1455		const Buffer				outputBuffer	(m_context.getRenderContext());
1456		const Buffer				counterBuffer	(m_context.getRenderContext());
1457		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
1458		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
1459		const int					numValues		= workGroupSize*workGroupCount;
1460
1461		std::ostringstream src;
1462		src << "#version 310 es\n"
1463			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
1464			<< "layout(binding = 0) buffer Output {\n"
1465			<< "    uint values[" << numValues << "];\n"
1466			<< "} sb_out;\n\n"
1467			<< "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n"
1468			<< "void main (void) {\n"
1469			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
1470			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
1471			<< "    uint globalOffs = localSize*globalNdx;\n"
1472			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
1473			<< "\n"
1474			<< "    uint oldVal = atomicCounterIncrement(u_count);\n"
1475			<< "    sb_out.values[globalOffs+localOffs] = oldVal;\n"
1476			<< "}\n";
1477
1478		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
1479
1480		m_testCtx.getLog() << program;
1481		if (!program.isOk())
1482			TCU_FAIL("Compile failed");
1483
1484		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
1485
1486		gl.useProgram(program.getProgram());
1487
1488		// Atomic counter buffer setup
1489		{
1490			const deUint32	uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1491			const deUint32	bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1492			const deUint32	bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1493
1494			gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer);
1495			gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ);
1496
1497			{
1498				const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT);
1499				deMemset(memMap.getPtr(), 0, (int)bufferSize);
1500			}
1501
1502			gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer);
1503			GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed");
1504		}
1505
1506		// Output buffer setup
1507		{
1508			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1509			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1510
1511			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
1512			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
1513			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
1514			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
1515		}
1516
1517		// Dispatch compute workload
1518		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
1519		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1520
1521		// Read back and compare atomic counter
1522		{
1523			const deUint32		uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
1524			const deUint32		uniformOffset	= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET);
1525			const deUint32		bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
1526			const deUint32		bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
1527			const BufferMemMap	bufMap			(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT);
1528
1529			const deUint32		resVal			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset));
1530
1531			if (resVal != (deUint32)numValues)
1532				throw tcu::TestError("Invalid atomic counter value");
1533		}
1534
1535		// Read back and compare SSBO
1536		{
1537			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
1538			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
1539			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
1540			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
1541			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
1542			deUint32					valSum		= 0;
1543			deUint32					refSum		= 0;
1544
1545			for (int valNdx = 0; valNdx < numValues; valNdx++)
1546			{
1547				const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx));
1548
1549				valSum += res;
1550				refSum += (deUint32)valNdx;
1551
1552				if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues))
1553					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]");
1554			}
1555
1556			if (valSum != refSum)
1557				throw tcu::TestError("Total sum of values in Output.values doesn't match");
1558		}
1559
1560		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1561		return STOP;
1562	}
1563
1564private:
1565	const tcu::IVec3	m_localSize;
1566	const tcu::IVec3	m_workSize;
1567};
1568
1569} // anonymous
1570
1571BasicComputeShaderTests::BasicComputeShaderTests (Context& context)
1572	: TestCaseGroup(context, "basic", "Basic Compute Shader Tests")
1573{
1574}
1575
1576BasicComputeShaderTests::~BasicComputeShaderTests (void)
1577{
1578}
1579
1580void BasicComputeShaderTests::init (void)
1581{
1582	addChild(new EmptyComputeShaderCase(m_context));
1583
1584	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_invocation",			"Copy from UBO to SSBO, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1585	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_group",					"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(2,1,4),	tcu::IVec3(1,1,1)));
1586	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_invocations",			"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
1587	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_groups",				"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1588
1589	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_single_invocation",				"Copy between SSBOs, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1590	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_invocations",			"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
1591	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_groups",				"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1592
1593	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_single_invocation",				"Read and write same SSBO",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1594	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_multiple_groups",					"Read and write same SSBO",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1595
1596	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_single_invocation",		"Read and write same SSBO",				256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1597	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_multiple_groups",			"Read and write same SSBO",				1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1598
1599	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation",		"Write to multiple SSBOs",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1600	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups",		"Write to multiple SSBOs",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1601
1602	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation",	"Write to multiple SSBOs",			256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1603	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups",	"Write to multiple SSBOs",			1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
1604
1605	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_invocation",		"SSBO local barrier usage",				tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1606	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_group",			"SSBO local barrier usage",				tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1607	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_multiple_groups",		"SSBO local barrier usage",				tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1608
1609	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_single",					"SSBO memory barrier usage",			tcu::IVec3(1,1,1)));
1610	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_multiple",				"SSBO memory barrier usage",			tcu::IVec3(11,5,7)));
1611
1612	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_invocation",				"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1613	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_group",					"Basic shared variable usage",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1614	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_invocations",			"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1615	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_groups",				"Basic shared variable usage",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1616
1617	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_invocation",		"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1618	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_group",			"Atomic operation with shared var",		tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1619	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_invocations",	"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1620	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_groups",			"Atomic operation with shared var",		tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1621
1622	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_small",					"Image to SSBO copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
1623	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_large",					"Image to SSBO copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
1624
1625	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_small",					"SSBO to image copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
1626	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_large",					"SSBO to image copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
1627
1628	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_1",				"Atomic operation with image",			1,	tcu::IVec2(64,64)));
1629	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_8",				"Atomic operation with image",			8,	tcu::IVec2(64,64)));
1630
1631	addChild(new ImageBarrierCase		(m_context, "image_barrier_single",						"Image barrier",						tcu::IVec2(1,1)));
1632	addChild(new ImageBarrierCase		(m_context, "image_barrier_multiple",					"Image barrier",						tcu::IVec2(64,64)));
1633
1634	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_invocation",			"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
1635	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_group",				"Basic atomic counter test",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
1636	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_invocations",		"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
1637	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_groups",			"Basic atomic counter test",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
1638}
1639
1640} // Functional
1641} // gles31
1642} // deqp
1643