1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Synchronization Tests
22 *//*--------------------------------------------------------------------*/
23
24#include "es31fSynchronizationTests.hpp"
25#include "tcuTestLog.hpp"
26#include "tcuSurface.hpp"
27#include "tcuRenderTarget.hpp"
28#include "gluRenderContext.hpp"
29#include "gluShaderProgram.hpp"
30#include "gluObjectWrapper.hpp"
31#include "gluPixelTransfer.hpp"
32#include "gluContextInfo.hpp"
33#include "glwFunctions.hpp"
34#include "glwEnums.hpp"
35#include "deStringUtil.hpp"
36#include "deSharedPtr.hpp"
37#include "deMemory.h"
38#include "deRandom.hpp"
39
40#include <map>
41
42namespace deqp
43{
44namespace gles31
45{
46namespace Functional
47{
48namespace
49{
50
51
52static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
53{
54	std::vector<deUint32> chainDelta(valueChain.size());
55
56	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
57		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
58
59	// chainDelta contains now the actual additions applied to the value
60	// check there exists an addition ramp form 1 to ...
61	std::sort(chainDelta.begin(), chainDelta.end());
62
63	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
64	{
65		if ((int)chainDelta[callNdx] != callNdx+1)
66		{
67			invalidOperationNdx = callNdx;
68			errorDelta = chainDelta[callNdx];
69			errorExpected = callNdx+1;
70
71			return false;
72		}
73	}
74
75	return true;
76}
77
78static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
79{
80	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
81	GLU_EXPECT_NO_ERROR(gl.getError(), "map");
82
83	if (!ptr)
84		throw tcu::TestError("mapBufferRange returned NULL");
85
86	result.resize(numElements);
87	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
88
89	if (gl.unmapBuffer(target) == GL_FALSE)
90		throw tcu::TestError("unmapBuffer returned false");
91}
92
93static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
94{
95	std::vector<deUint32> vec;
96
97	readBuffer(gl, target, 1, vec);
98
99	return vec[0];
100}
101
102//! Generate a ramp of values from 1 to numElements, and shuffle it
103void generateShuffledRamp (int numElements, std::vector<int>& ramp)
104{
105	de::Random rng(0xabcd);
106
107	// some positive (non-zero) unique values
108	ramp.resize(numElements);
109	for (int callNdx = 0; callNdx < numElements; ++callNdx)
110		ramp[callNdx] = callNdx + 1;
111
112	rng.shuffle(ramp.begin(), ramp.end());
113}
114
115class InterInvocationTestCase : public TestCase
116{
117public:
118	enum StorageType
119	{
120		STORAGE_BUFFER = 0,
121		STORAGE_IMAGE,
122
123		STORAGE_LAST
124	};
125	enum CaseFlags
126	{
127		FLAG_ATOMIC				= 0x1,
128		FLAG_ALIASING_STORAGES	= 0x2,
129		FLAG_IN_GROUP			= 0x4,
130	};
131
132						InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
133						~InterInvocationTestCase	(void);
134
135private:
136	void				init						(void);
137	void				deinit						(void);
138	IterateResult		iterate						(void);
139
140	void				runCompute					(void);
141	bool				verifyResults				(void);
142	virtual std::string	genShaderSource				(void) const = 0;
143
144protected:
145	std::string			genBarrierSource			(void) const;
146
147	const StorageType	m_storage;
148	const bool			m_useAtomic;
149	const bool			m_aliasingStorages;
150	const bool			m_syncWithGroup;
151	const int			m_workWidth;				// !< total work width
152	const int			m_workHeight;				// !<     ...    height
153	const int			m_localWidth;				// !< group width
154	const int			m_localHeight;				// !< group height
155	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation
156
157private:
158	glw::GLuint			m_storageBuf;
159	glw::GLuint			m_storageTex;
160	glw::GLuint			m_resultBuf;
161	glu::ShaderProgram*	m_program;
162};
163
164InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
165	: TestCase					(context, name, desc)
166	, m_storage					(storage)
167	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
168	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
169	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
170	, m_workWidth				(256)
171	, m_workHeight				(256)
172	, m_localWidth				(16)
173	, m_localHeight				(8)
174	, m_elementsPerInvocation	(8)
175	, m_storageBuf				(0)
176	, m_storageTex				(0)
177	, m_resultBuf				(0)
178	, m_program					(DE_NULL)
179{
180	DE_ASSERT(m_storage < STORAGE_LAST);
181	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
182}
183
184InterInvocationTestCase::~InterInvocationTestCase (void)
185{
186	deinit();
187}
188
189void InterInvocationTestCase::init (void)
190{
191	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
192
193	// requirements
194
195	if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
196		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
197
198	// program
199
200	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
201	m_testCtx.getLog() << *m_program;
202	if (!m_program->isOk())
203		throw tcu::TestError("could not build program");
204
205	// source
206
207	if (m_storage == STORAGE_BUFFER)
208	{
209		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
210		const int				bufferSize		= bufferElements * sizeof(deUint32);
211		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);
212
213		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
214
215		gl.genBuffers(1, &m_storageBuf);
216		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
217		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
218		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
219	}
220	else if (m_storage == STORAGE_IMAGE)
221	{
222		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
223		const int				bufferSize		= bufferElements * sizeof(deUint32);
224
225		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
226
227		gl.genTextures(1, &m_storageTex);
228		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
229		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
230		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
231		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
232		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
233
234		// Zero-fill
235		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
236
237		{
238			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
239			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
240			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
241		}
242	}
243	else
244		DE_ASSERT(DE_FALSE);
245
246	// destination
247
248	{
249		const int				bufferElements	= m_workWidth * m_workHeight;
250		const int				bufferSize		= bufferElements * sizeof(deUint32);
251		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);
252
253		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
254
255		gl.genBuffers(1, &m_resultBuf);
256		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
257		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
258		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
259	}
260}
261
262void InterInvocationTestCase::deinit (void)
263{
264	if (m_storageBuf)
265	{
266		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
267		m_storageBuf = DE_NULL;
268	}
269
270	if (m_storageTex)
271	{
272		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
273		m_storageTex = DE_NULL;
274	}
275
276	if (m_resultBuf)
277	{
278		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
279		m_resultBuf = DE_NULL;
280	}
281
282	delete m_program;
283	m_program = DE_NULL;
284}
285
286InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
287{
288	// Dispatch
289	runCompute();
290
291	// Verify buffer contents
292	if (verifyResults())
293		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
294	else
295		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
296
297	return STOP;
298}
299
300void InterInvocationTestCase::runCompute (void)
301{
302	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
303	const int				groupsX	= m_workWidth / m_localWidth;
304	const int				groupsY	= m_workHeight / m_localHeight;
305
306	DE_ASSERT((m_workWidth % m_localWidth) == 0);
307	DE_ASSERT((m_workHeight % m_localHeight) == 0);
308
309	m_testCtx.getLog()
310		<< tcu::TestLog::Message
311		<< "Dispatching compute.\n"
312		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
313		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
314		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
315		<< tcu::TestLog::EndMessage;
316
317	gl.useProgram(m_program->getProgram());
318
319	// source
320	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
321	{
322		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
323		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
324	}
325	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
326	{
327		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
328		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
329		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
330
331		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
332	}
333	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
334	{
335		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
336		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
337	}
338	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
339	{
340		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
341		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
342
343		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
344
345		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
346	}
347	else
348		DE_ASSERT(DE_FALSE);
349
350	// destination
351	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
352	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
353
354	// dispatch
355	gl.dispatchCompute(groupsX, groupsY, 1);
356	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
357}
358
359bool InterInvocationTestCase::verifyResults (void)
360{
361	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
362	const int				errorFloodThreshold	= 5;
363	int						numErrorsLogged		= 0;
364	const void*				mapped				= DE_NULL;
365	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
366	bool					error				= false;
367
368	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
369	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
370	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
371
372	// copy to properly aligned array
373	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
374
375	if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
376		throw tcu::TestError("memory map store corrupted");
377
378	// check the results
379	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
380	{
381		if (results[ndx] != 1)
382		{
383			error = true;
384
385			if (numErrorsLogged == 0)
386				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
387			if (numErrorsLogged++ < errorFloodThreshold)
388				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
389			else
390			{
391				// after N errors, no point continuing verification
392				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
393				break;
394			}
395		}
396	}
397
398	if (!error)
399		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
400	return !error;
401}
402
403std::string InterInvocationTestCase::genBarrierSource (void) const
404{
405	std::ostringstream buf;
406
407	if (m_syncWithGroup)
408	{
409		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
410		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
411		//       we only require intra-workgroup synchronization.
412		buf << "\n"
413			<< "	groupMemoryBarrier();\n"
414			<< "	barrier();\n"
415			<< "\n";
416	}
417	else if (m_storage == STORAGE_BUFFER)
418	{
419		DE_ASSERT(!m_syncWithGroup);
420
421		// Waiting only for data written by this invocation. Since all buffer reads and writes are
422		// processed in order (within a single invocation), we don't have to do anything.
423		buf << "\n";
424	}
425	else if (m_storage == STORAGE_IMAGE)
426	{
427		DE_ASSERT(!m_syncWithGroup);
428
429		// Waiting only for data written by this invocation. But since operations complete in undefined
430		// order, we have to wait for them to complete.
431		buf << "\n"
432			<< "	memoryBarrierImage();\n"
433			<< "\n";
434	}
435	else
436		DE_ASSERT(DE_FALSE);
437
438	return buf.str();
439}
440
441class InvocationBasicCase : public InterInvocationTestCase
442{
443public:
444							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
445private:
446	std::string				genShaderSource			(void) const;
447	virtual std::string		genShaderMainBlock		(void) const = 0;
448};
449
450InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
451	: InterInvocationTestCase(context, name, desc, storage, flags)
452{
453}
454
455std::string InvocationBasicCase::genShaderSource (void) const
456{
457	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
458	std::ostringstream	buf;
459
460	buf << "#version 310 es\n"
461		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
462		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
463		<< "layout(binding=0, std430) buffer Output\n"
464		<< "{\n"
465		<< "	highp int values[];\n"
466		<< "} sb_result;\n";
467
468	if (m_storage == STORAGE_BUFFER)
469		buf << "layout(binding=1, std430) coherent buffer Storage\n"
470			<< "{\n"
471			<< "	highp int values[];\n"
472			<< "} sb_store;\n"
473			<< "\n"
474			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
475			<< "{\n"
476			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
477			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
478			<< "}\n";
479	else if (m_storage == STORAGE_IMAGE)
480		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
481			<< "\n"
482			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
483			<< "{\n"
484			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
485			<< "}\n";
486	else
487		DE_ASSERT(DE_FALSE);
488
489	buf << "\n"
490		<< "void main (void)\n"
491		<< "{\n"
492		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
493		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
494		<< "	bool allOk      = true;\n"
495		<< "\n"
496		<< genShaderMainBlock()
497		<< "\n"
498		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
499		<< "}\n";
500
501	return buf.str();
502}
503
504class InvocationWriteReadCase : public InvocationBasicCase
505{
506public:
507					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
508private:
509	std::string		genShaderMainBlock			(void) const;
510};
511
512InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
513	: InvocationBasicCase(context, name, desc, storage, flags)
514{
515}
516
517std::string InvocationWriteReadCase::genShaderMainBlock (void) const
518{
519	std::ostringstream buf;
520
521	// write
522
523	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
524	{
525		if (m_storage == STORAGE_BUFFER && m_useAtomic)
526			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
527		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
528			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
529		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
530			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
531		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
532			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
533		else
534			DE_ASSERT(DE_FALSE);
535	}
536
537	// barrier
538
539	buf << genBarrierSource();
540
541	// read
542
543	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
544	{
545		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
546
547		if (m_storage == STORAGE_BUFFER && m_useAtomic)
548			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
549		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
550			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
551		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
552			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
553		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
554			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
555		else
556			DE_ASSERT(DE_FALSE);
557	}
558
559	return buf.str();
560}
561
562class InvocationReadWriteCase : public InvocationBasicCase
563{
564public:
565					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
566private:
567	std::string		genShaderMainBlock			(void) const;
568};
569
570InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
571	: InvocationBasicCase(context, name, desc, storage, flags)
572{
573}
574
575std::string InvocationReadWriteCase::genShaderMainBlock (void) const
576{
577	std::ostringstream buf;
578
579	// read
580
581	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
582	{
583		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
584
585		if (m_storage == STORAGE_BUFFER && m_useAtomic)
586			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
587		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
588			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
589		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
590			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
591		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
592			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
593		else
594			DE_ASSERT(DE_FALSE);
595	}
596
597	// barrier
598
599	buf << genBarrierSource();
600
601	// write
602
603	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
604	{
605		if (m_storage == STORAGE_BUFFER && m_useAtomic)
606			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
607		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
608			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
609		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
610			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
611		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
612			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
613		else
614			DE_ASSERT(DE_FALSE);
615	}
616
617	return buf.str();
618}
619
620class InvocationOverWriteCase : public InvocationBasicCase
621{
622public:
623					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
624private:
625	std::string		genShaderMainBlock			(void) const;
626};
627
628InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
629	: InvocationBasicCase(context, name, desc, storage, flags)
630{
631}
632
633std::string InvocationOverWriteCase::genShaderMainBlock (void) const
634{
635	std::ostringstream buf;
636
637	// write
638
639	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
640	{
641		if (m_storage == STORAGE_BUFFER && m_useAtomic)
642			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
643		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
644			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
645		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
646			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
647		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
648			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
649		else
650			DE_ASSERT(DE_FALSE);
651	}
652
653	// barrier
654
655	buf << genBarrierSource();
656
657	// write over
658
659	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
660	{
661		// write another invocation's value or our own value depending on test type
662		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
663
664		if (m_storage == STORAGE_BUFFER && m_useAtomic)
665			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
666		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
668		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
670		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
672		else
673			DE_ASSERT(DE_FALSE);
674	}
675
676	// barrier
677
678	buf << genBarrierSource();
679
680	// read
681
682	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683	{
684		// check another invocation's value or our own value depending on test type
685		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686
687		if (m_storage == STORAGE_BUFFER && m_useAtomic)
688			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
689		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
691		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
693		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
695		else
696			DE_ASSERT(DE_FALSE);
697	}
698
699	return buf.str();
700}
701
702class InvocationAliasWriteCase : public InterInvocationTestCase
703{
704public:
705	enum TestType
706	{
707		TYPE_WRITE = 0,
708		TYPE_OVERWRITE,
709
710		TYPE_LAST
711	};
712
713					InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
714private:
715	std::string		genShaderSource				(void) const;
716
717	const TestType	m_type;
718};
719
720InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
721	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
722	, m_type					(type)
723{
724	DE_ASSERT(type < TYPE_LAST);
725}
726
727std::string InvocationAliasWriteCase::genShaderSource (void) const
728{
729	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
730	std::ostringstream	buf;
731
732	buf << "#version 310 es\n"
733		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
734		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
735		<< "layout(binding=0, std430) buffer Output\n"
736		<< "{\n"
737		<< "	highp int values[];\n"
738		<< "} sb_result;\n";
739
740	if (m_storage == STORAGE_BUFFER)
741		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
742			<< "{\n"
743			<< "	highp int values[];\n"
744			<< "} sb_store0;\n"
745			<< "layout(binding=2, std430) coherent buffer Storage1\n"
746			<< "{\n"
747			<< "	highp int values[];\n"
748			<< "} sb_store1;\n"
749			<< "\n"
750			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
751			<< "{\n"
752			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
753			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
754			<< "}\n";
755	else if (m_storage == STORAGE_IMAGE)
756		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
757			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
758			<< "\n"
759			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
760			<< "{\n"
761			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
762			<< "}\n";
763	else
764		DE_ASSERT(DE_FALSE);
765
766	buf << "\n"
767		<< "void main (void)\n"
768		<< "{\n"
769		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
770		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
771		<< "	bool allOk      = true;\n"
772		<< "\n";
773
774	if (m_type == TYPE_OVERWRITE)
775	{
776		// write
777
778		for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
779		{
780			if (m_storage == STORAGE_BUFFER && m_useAtomic)
781				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
782			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
783				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
784			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
785				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
786			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
787				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
788			else
789				DE_ASSERT(DE_FALSE);
790		}
791
792		// barrier
793
794		buf << genBarrierSource();
795	}
796	else
797		DE_ASSERT(m_type == TYPE_WRITE);
798
799	// write (again)
800
801	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802	{
803		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
804
805		if (m_storage == STORAGE_BUFFER && m_useAtomic)
806			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
807		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
808			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
809		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
810			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
811		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
812			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
813		else
814			DE_ASSERT(DE_FALSE);
815	}
816
817	// barrier
818
819	buf << genBarrierSource();
820
821	// read
822
823	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
824	{
825		if (m_storage == STORAGE_BUFFER && m_useAtomic)
826			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
827		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
828			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
829		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
830			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
831		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
832			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
833		else
834			DE_ASSERT(DE_FALSE);
835	}
836
837	// return result
838
839	buf << "\n"
840		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
841		<< "}\n";
842
843	return buf.str();
844}
845
846namespace op
847{
848
849struct WriteData
850{
851	int targetHandle;
852	int seed;
853
854	static WriteData Generate(int targetHandle, int seed)
855	{
856		WriteData retVal;
857
858		retVal.targetHandle = targetHandle;
859		retVal.seed = seed;
860
861		return retVal;
862	}
863};
864
865struct ReadData
866{
867	int targetHandle;
868	int seed;
869
870	static ReadData Generate(int targetHandle, int seed)
871	{
872		ReadData retVal;
873
874		retVal.targetHandle = targetHandle;
875		retVal.seed = seed;
876
877		return retVal;
878	}
879};
880
881struct Barrier
882{
883};
884
885struct WriteDataInterleaved
886{
887	int		targetHandle;
888	int		seed;
889	bool	evenOdd;
890
891	static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
892	{
893		WriteDataInterleaved retVal;
894
895		retVal.targetHandle = targetHandle;
896		retVal.seed = seed;
897		retVal.evenOdd = evenOdd;
898
899		return retVal;
900	}
901};
902
903struct ReadDataInterleaved
904{
905	int targetHandle;
906	int seed0;
907	int seed1;
908
909	static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
910	{
911		ReadDataInterleaved retVal;
912
913		retVal.targetHandle = targetHandle;
914		retVal.seed0 = seed0;
915		retVal.seed1 = seed1;
916
917		return retVal;
918	}
919};
920
921struct ReadMultipleData
922{
923	int targetHandle0;
924	int seed0;
925	int targetHandle1;
926	int seed1;
927
928	static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
929	{
930		ReadMultipleData retVal;
931
932		retVal.targetHandle0 = targetHandle0;
933		retVal.seed0 = seed0;
934		retVal.targetHandle1 = targetHandle1;
935		retVal.seed1 = seed1;
936
937		return retVal;
938	}
939};
940
941struct ReadZeroData
942{
943	int targetHandle;
944
945	static ReadZeroData Generate(int targetHandle)
946	{
947		ReadZeroData retVal;
948
949		retVal.targetHandle = targetHandle;
950
951		return retVal;
952	}
953};
954
955} // namespace op
956
957class InterCallTestCase;
958
959class InterCallOperations
960{
961public:
962	InterCallOperations& operator<< (const op::WriteData&);
963	InterCallOperations& operator<< (const op::ReadData&);
964	InterCallOperations& operator<< (const op::Barrier&);
965	InterCallOperations& operator<< (const op::ReadMultipleData&);
966	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
967	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
968	InterCallOperations& operator<< (const op::ReadZeroData&);
969
970private:
971	struct Command
972	{
973		enum CommandType
974		{
975			TYPE_WRITE = 0,
976			TYPE_READ,
977			TYPE_BARRIER,
978			TYPE_READ_MULTIPLE,
979			TYPE_WRITE_INTERLEAVE,
980			TYPE_READ_INTERLEAVE,
981			TYPE_READ_ZERO,
982
983			TYPE_LAST
984		};
985
986		CommandType type;
987
988		union CommandUnion
989		{
990			op::WriteData				write;
991			op::ReadData				read;
992			op::Barrier					barrier;
993			op::ReadMultipleData		readMulti;
994			op::WriteDataInterleaved	writeInterleave;
995			op::ReadDataInterleaved		readInterleave;
996			op::ReadZeroData			readZero;
997		} u_cmd;
998	};
999
1000	friend class InterCallTestCase;
1001
1002	std::vector<Command> m_cmds;
1003};
1004
1005InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1006{
1007	m_cmds.push_back(Command());
1008	m_cmds.back().type = Command::TYPE_WRITE;
1009	m_cmds.back().u_cmd.write = cmd;
1010
1011	return *this;
1012}
1013
1014InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1015{
1016	m_cmds.push_back(Command());
1017	m_cmds.back().type = Command::TYPE_READ;
1018	m_cmds.back().u_cmd.read = cmd;
1019
1020	return *this;
1021}
1022
1023InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1024{
1025	m_cmds.push_back(Command());
1026	m_cmds.back().type = Command::TYPE_BARRIER;
1027	m_cmds.back().u_cmd.barrier = cmd;
1028
1029	return *this;
1030}
1031
1032InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1033{
1034	m_cmds.push_back(Command());
1035	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1036	m_cmds.back().u_cmd.readMulti = cmd;
1037
1038	return *this;
1039}
1040
1041InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1042{
1043	m_cmds.push_back(Command());
1044	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1045	m_cmds.back().u_cmd.writeInterleave = cmd;
1046
1047	return *this;
1048}
1049
1050InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1051{
1052	m_cmds.push_back(Command());
1053	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1054	m_cmds.back().u_cmd.readInterleave = cmd;
1055
1056	return *this;
1057}
1058
1059InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1060{
1061	m_cmds.push_back(Command());
1062	m_cmds.back().type = Command::TYPE_READ_ZERO;
1063	m_cmds.back().u_cmd.readZero = cmd;
1064
1065	return *this;
1066}
1067
1068class InterCallTestCase : public TestCase
1069{
1070public:
1071	enum StorageType
1072	{
1073		STORAGE_BUFFER = 0,
1074		STORAGE_IMAGE,
1075
1076		STORAGE_LAST
1077	};
1078	enum Flags
1079	{
1080		FLAG_USE_ATOMIC	= 1,
1081		FLAG_USE_INT	= 2,
1082	};
1083													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1084													~InterCallTestCase			(void);
1085
1086private:
1087	void											init						(void);
1088	void											deinit						(void);
1089	IterateResult									iterate						(void);
1090	bool											verifyResults				(void);
1091
1092	void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1093	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1094	void											runCommand					(const op::Barrier&);
1095	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1096	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1097	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1098	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1099	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1100
1101	glw::GLuint										genStorage					(int friendlyName);
1102	glw::GLuint										genResultStorage			(void);
1103	glu::ShaderProgram*								genWriteProgram				(int seed);
1104	glu::ShaderProgram*								genReadProgram				(int seed);
1105	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
1106	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
1107	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
1108	glu::ShaderProgram*								genReadZeroProgram			(void);
1109
1110	const StorageType								m_storage;
1111	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
1112	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
1113	const std::vector<InterCallOperations::Command>	m_cmds;
1114	const bool										m_useAtomic;
1115	const bool										m_formatInteger;
1116
1117	std::vector<glu::ShaderProgram*>				m_operationPrograms;
1118	std::vector<glw::GLuint>						m_operationResultStorages;
1119	std::map<int, glw::GLuint>						m_storageIDs;
1120};
1121
1122InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1123	: TestCase					(context, name, desc)
1124	, m_storage					(storage)
1125	, m_invocationGridSize		(512)
1126	, m_perInvocationSize		(2)
1127	, m_cmds					(ops.m_cmds)
1128	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
1129	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
1130{
1131}
1132
1133InterCallTestCase::~InterCallTestCase (void)
1134{
1135	deinit();
1136}
1137
1138void InterCallTestCase::init (void)
1139{
1140	int programFriendlyName = 0;
1141
1142	// requirements
1143
1144	if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1145		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1146
1147	// generate resources and validate command list
1148
1149	m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1150	m_operationResultStorages.resize(m_cmds.size(), 0);
1151
1152	for (int step = 0; step < (int)m_cmds.size(); ++step)
1153	{
1154		switch (m_cmds[step].type)
1155		{
1156			case InterCallOperations::Command::TYPE_WRITE:
1157			{
1158				const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1159
1160				// new storage handle?
1161				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1162					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1163
1164				// program
1165				{
1166					glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1167
1168					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1169					m_testCtx.getLog() << *program;
1170
1171					if (!program->isOk())
1172						throw tcu::TestError("could not build program");
1173
1174					m_operationPrograms[step] = program;
1175				}
1176				break;
1177			}
1178
1179			case InterCallOperations::Command::TYPE_READ:
1180			{
1181				const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1182				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1183
1184				// program and result storage
1185				{
1186					glu::ShaderProgram* program = genReadProgram(cmd.seed);
1187
1188					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1189					m_testCtx.getLog() << *program;
1190
1191					if (!program->isOk())
1192						throw tcu::TestError("could not build program");
1193
1194					m_operationPrograms[step] = program;
1195					m_operationResultStorages[step] = genResultStorage();
1196				}
1197				break;
1198			}
1199
1200			case InterCallOperations::Command::TYPE_BARRIER:
1201			{
1202				break;
1203			}
1204
1205			case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1206			{
1207				const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1208				DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1209				DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1210
1211				// program
1212				{
1213					glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1214
1215					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1216					m_testCtx.getLog() << *program;
1217
1218					if (!program->isOk())
1219						throw tcu::TestError("could not build program");
1220
1221					m_operationPrograms[step] = program;
1222					m_operationResultStorages[step] = genResultStorage();
1223				}
1224				break;
1225			}
1226
1227			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1228			{
1229				const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1230
1231				// new storage handle?
1232				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1233					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1234
1235				// program
1236				{
1237					glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1238
1239					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1240					m_testCtx.getLog() << *program;
1241
1242					if (!program->isOk())
1243						throw tcu::TestError("could not build program");
1244
1245					m_operationPrograms[step] = program;
1246				}
1247				break;
1248			}
1249
1250			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1251			{
1252				const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1253				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1254
1255				// program
1256				{
1257					glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1258
1259					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1260					m_testCtx.getLog() << *program;
1261
1262					if (!program->isOk())
1263						throw tcu::TestError("could not build program");
1264
1265					m_operationPrograms[step] = program;
1266					m_operationResultStorages[step] = genResultStorage();
1267				}
1268				break;
1269			}
1270
1271			case InterCallOperations::Command::TYPE_READ_ZERO:
1272			{
1273				const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1274
1275				// new storage handle?
1276				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1277					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1278
1279				// program
1280				{
1281					glu::ShaderProgram* program = genReadZeroProgram();
1282
1283					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1284					m_testCtx.getLog() << *program;
1285
1286					if (!program->isOk())
1287						throw tcu::TestError("could not build program");
1288
1289					m_operationPrograms[step] = program;
1290					m_operationResultStorages[step] = genResultStorage();
1291				}
1292				break;
1293			}
1294
1295			default:
1296				DE_ASSERT(DE_FALSE);
1297		}
1298	}
1299}
1300
1301void InterCallTestCase::deinit (void)
1302{
1303	// programs
1304	for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1305		delete m_operationPrograms[ndx];
1306	m_operationPrograms.clear();
1307
1308	// result storages
1309	for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1310	{
1311		if (m_operationResultStorages[ndx])
1312			m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1313	}
1314	m_operationResultStorages.clear();
1315
1316	// storage
1317	for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1318	{
1319		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1320
1321		if (m_storage == STORAGE_BUFFER)
1322			gl.deleteBuffers(1, &it->second);
1323		else if (m_storage == STORAGE_IMAGE)
1324			gl.deleteTextures(1, &it->second);
1325		else
1326			DE_ASSERT(DE_FALSE);
1327	}
1328	m_storageIDs.clear();
1329}
1330
1331InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1332{
1333	int programFriendlyName			= 0;
1334	int resultStorageFriendlyName	= 0;
1335
1336	m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1337
1338	// run steps
1339
1340	for (int step = 0; step < (int)m_cmds.size(); ++step)
1341	{
1342		switch (m_cmds[step].type)
1343		{
1344			case InterCallOperations::Command::TYPE_WRITE:				runCommand(m_cmds[step].u_cmd.write,			step,	programFriendlyName);								break;
1345			case InterCallOperations::Command::TYPE_READ:				runCommand(m_cmds[step].u_cmd.read,				step,	programFriendlyName, resultStorageFriendlyName);	break;
1346			case InterCallOperations::Command::TYPE_BARRIER:			runCommand(m_cmds[step].u_cmd.barrier);																		break;
1347			case InterCallOperations::Command::TYPE_READ_MULTIPLE:		runCommand(m_cmds[step].u_cmd.readMulti,		step,	programFriendlyName, resultStorageFriendlyName);	break;
1348			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.writeInterleave,	step,	programFriendlyName);								break;
1349			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.readInterleave,	step,	programFriendlyName, resultStorageFriendlyName);	break;
1350			case InterCallOperations::Command::TYPE_READ_ZERO:			runCommand(m_cmds[step].u_cmd.readZero,			step,	programFriendlyName, resultStorageFriendlyName);	break;
1351			default:
1352				DE_ASSERT(DE_FALSE);
1353		}
1354	}
1355
1356	// read results from result buffers
1357	if (verifyResults())
1358		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1359	else
1360		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1361
1362	return STOP;
1363}
1364
1365bool InterCallTestCase::verifyResults (void)
1366{
1367	int		resultBufferFriendlyName	= 0;
1368	bool	allResultsOk				= true;
1369	bool	anyResult					= false;
1370
1371	m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1372
1373	for (int step = 0; step < (int)m_cmds.size(); ++step)
1374	{
1375		const int	errorFloodThreshold	= 5;
1376		int			numErrorsLogged		= 0;
1377
1378		if (m_operationResultStorages[step])
1379		{
1380			const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1381			const void*				mapped	= DE_NULL;
1382			std::vector<deInt32>	results	(m_invocationGridSize * m_invocationGridSize);
1383			bool					error	= false;
1384
1385			anyResult = true;
1386
1387			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1388			mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1389			GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1390
1391			// copy to properly aligned array
1392			deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1393
1394			if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1395				throw tcu::TestError("memory map store corrupted");
1396
1397			// check the results
1398			for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1399			{
1400				if (results[ndx] != 1)
1401				{
1402					error = true;
1403
1404					if (numErrorsLogged == 0)
1405						m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1406					if (numErrorsLogged++ < errorFloodThreshold)
1407						m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1408					else
1409					{
1410						// after N errors, no point continuing verification
1411						m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1412						break;
1413					}
1414				}
1415			}
1416
1417			if (error)
1418			{
1419				allResultsOk = false;
1420			}
1421			else
1422				m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1423		}
1424	}
1425
1426	DE_ASSERT(anyResult);
1427	DE_UNREF(anyResult);
1428
1429	return allResultsOk;
1430}
1431
1432void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1433{
1434	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1435
1436	m_testCtx.getLog()
1437		<< tcu::TestLog::Message
1438		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1439		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1440		<< tcu::TestLog::EndMessage;
1441
1442	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1443
1444	// set destination
1445	if (m_storage == STORAGE_BUFFER)
1446	{
1447		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1448
1449		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1450		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1451	}
1452	else if (m_storage == STORAGE_IMAGE)
1453	{
1454		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1455
1456		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1457		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1458	}
1459	else
1460		DE_ASSERT(DE_FALSE);
1461
1462	// calc
1463	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1464	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1465}
1466
1467void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1468{
1469	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1470}
1471
1472void InterCallTestCase::runCommand (const op::Barrier& cmd)
1473{
1474	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1475
1476	DE_UNREF(cmd);
1477
1478	if (m_storage == STORAGE_BUFFER)
1479	{
1480		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1481		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1482	}
1483	else if (m_storage == STORAGE_IMAGE)
1484	{
1485		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1486		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1487	}
1488	else
1489		DE_ASSERT(DE_FALSE);
1490}
1491
1492void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1493{
1494	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1495
1496	m_testCtx.getLog()
1497		<< tcu::TestLog::Message
1498		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1499		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1500		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1501		<< tcu::TestLog::EndMessage;
1502
1503	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1504
1505	// set sources
1506	if (m_storage == STORAGE_BUFFER)
1507	{
1508		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1509		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1510
1511		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1512		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1513		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1514	}
1515	else if (m_storage == STORAGE_IMAGE)
1516	{
1517		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1518		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1519
1520		gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1521		gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1522		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1523	}
1524	else
1525		DE_ASSERT(DE_FALSE);
1526
1527	// set destination
1528	DE_ASSERT(m_operationResultStorages[stepNdx]);
1529	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1530	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1531
1532	// calc
1533	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1534	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1535}
1536
1537void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1538{
1539	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1540
1541	m_testCtx.getLog()
1542		<< tcu::TestLog::Message
1543		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1544		<< "	Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1545		<< "	Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1546		<< tcu::TestLog::EndMessage;
1547
1548	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1549
1550	// set destination
1551	if (m_storage == STORAGE_BUFFER)
1552	{
1553		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1554
1555		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1556		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1557	}
1558	else if (m_storage == STORAGE_IMAGE)
1559	{
1560		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1561
1562		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1563		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1564	}
1565	else
1566		DE_ASSERT(DE_FALSE);
1567
1568	// calc
1569	gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1570	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1571}
1572
1573void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1574{
1575	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1576}
1577
1578void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1579{
1580	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1581}
1582
1583void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1584{
1585	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1586
1587	m_testCtx.getLog()
1588		<< tcu::TestLog::Message
1589		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1590		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1591		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1592		<< tcu::TestLog::EndMessage;
1593
1594	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1595
1596	// set source
1597	if (m_storage == STORAGE_BUFFER)
1598	{
1599		DE_ASSERT(m_storageIDs[targetHandle]);
1600
1601		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1602		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1603	}
1604	else if (m_storage == STORAGE_IMAGE)
1605	{
1606		DE_ASSERT(m_storageIDs[targetHandle]);
1607
1608		gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1609		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1610	}
1611	else
1612		DE_ASSERT(DE_FALSE);
1613
1614	// set destination
1615	DE_ASSERT(m_operationResultStorages[stepNdx]);
1616	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1617	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1618
1619	// calc
1620	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1621	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1622}
1623
1624glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1625{
1626	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1627
1628	if (m_storage == STORAGE_BUFFER)
1629	{
1630		const int		numElements		= m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1631		const int		bufferSize		= numElements * ((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1632		glw::GLuint		retVal			= 0;
1633
1634		m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1635
1636		gl.genBuffers(1, &retVal);
1637		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1638
1639		if (m_formatInteger)
1640		{
1641			const std::vector<deUint32> zeroBuffer(numElements, 0);
1642			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1643		}
1644		else
1645		{
1646			const std::vector<float> zeroBuffer(numElements, 0.0f);
1647			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1648		}
1649		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1650
1651		return retVal;
1652	}
1653	else if (m_storage == STORAGE_IMAGE)
1654	{
1655		const int	imageWidth	= m_invocationGridSize;
1656		const int	imageHeight	= m_invocationGridSize * m_perInvocationSize;
1657		glw::GLuint	retVal		= 0;
1658
1659		m_testCtx.getLog()
1660			<< tcu::TestLog::Message
1661			<< "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1662			<< ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1663			<< ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1664			<< tcu::TestLog::EndMessage;
1665
1666		gl.genTextures(1, &retVal);
1667		gl.bindTexture(GL_TEXTURE_2D, retVal);
1668
1669		if (m_formatInteger)
1670			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1671		else
1672			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1673
1674		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1675		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1676		GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1677
1678		m_testCtx.getLog()
1679			<< tcu::TestLog::Message
1680			<< "Filling image with 0"
1681			<< tcu::TestLog::EndMessage;
1682
1683		if (m_formatInteger)
1684		{
1685			const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1686			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1687		}
1688		else
1689		{
1690			const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1691			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1692		}
1693
1694		GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1695
1696		return retVal;
1697	}
1698	else
1699	{
1700		DE_ASSERT(DE_FALSE);
1701		return 0;
1702	}
1703}
1704
1705glw::GLuint InterCallTestCase::genResultStorage (void)
1706{
1707	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1708	glw::GLuint				retVal	= 0;
1709
1710	gl.genBuffers(1, &retVal);
1711	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1712	gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1713	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1714
1715	return retVal;
1716}
1717
1718glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1719{
1720	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1721	std::ostringstream	buf;
1722
1723	buf << "#version 310 es\n"
1724		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1725		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1726
1727	if (m_storage == STORAGE_BUFFER)
1728		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1729			<< "{\n"
1730			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1731			<< "} sb_out;\n";
1732	else if (m_storage == STORAGE_IMAGE)
1733		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1734	else
1735		DE_ASSERT(DE_FALSE);
1736
1737	buf << "\n"
1738		<< "void main (void)\n"
1739		<< "{\n"
1740		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1741		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1742		<< "\n";
1743
1744	// Write to buffer/image m_perInvocationSize elements
1745	if (m_storage == STORAGE_BUFFER)
1746	{
1747		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1748		{
1749			if (m_useAtomic)
1750				buf << "	atomicExchange(";
1751			else
1752				buf << "	";
1753
1754			buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1755
1756			if (m_useAtomic)
1757				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1758			else
1759				buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1760		}
1761	}
1762	else if (m_storage == STORAGE_IMAGE)
1763	{
1764		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1765		{
1766			if (m_useAtomic)
1767				buf << "	imageAtomicExchange";
1768			else
1769				buf << "	imageStore";
1770
1771			buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1772
1773			if (m_useAtomic)
1774				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1775			else
1776				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1777		}
1778	}
1779	else
1780		DE_ASSERT(DE_FALSE);
1781
1782	buf << "}\n";
1783
1784	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1785}
1786
1787glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1788{
1789	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1790	std::ostringstream	buf;
1791
1792	buf << "#version 310 es\n"
1793		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1794		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1795
1796	if (m_storage == STORAGE_BUFFER)
1797		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1798			<< "{\n"
1799			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1800			<< "} sb_in;\n";
1801	else if (m_storage == STORAGE_IMAGE)
1802		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1803	else
1804		DE_ASSERT(DE_FALSE);
1805
1806	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1807		<< "{\n"
1808		<< "	highp int resultOk[];\n"
1809		<< "} sb_result;\n"
1810		<< "\n"
1811		<< "void main (void)\n"
1812		<< "{\n"
1813		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1814		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1815		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1816		<< "	bool allOk = true;\n"
1817		<< "\n";
1818
1819	// Verify data
1820
1821	if (m_storage == STORAGE_BUFFER)
1822	{
1823		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1824		{
1825			if (!m_useAtomic)
1826				buf << "	allOk = allOk && (sb_in.values[(groupNdx + "
1827					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1828					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1829			else
1830				buf << "	allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1831					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1832					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1833		}
1834	}
1835	else if (m_storage == STORAGE_IMAGE)
1836	{
1837		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1838		{
1839			if (!m_useAtomic)
1840				buf	<< "	allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1841					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1842					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1843			else
1844				buf << "	allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1845					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1846					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1847		}
1848	}
1849	else
1850		DE_ASSERT(DE_FALSE);
1851
1852	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1853		<< "}\n";
1854
1855	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1856}
1857
1858glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1859{
1860	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1861	std::ostringstream	buf;
1862
1863	buf << "#version 310 es\n"
1864		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1865		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1866
1867	if (m_storage == STORAGE_BUFFER)
1868		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1869			<< "{\n"
1870			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1871			<< "} sb_in0;\n"
1872			<< "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1873			<< "{\n"
1874			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1875			<< "} sb_in1;\n";
1876	else if (m_storage == STORAGE_IMAGE)
1877		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1878			<< "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1879	else
1880		DE_ASSERT(DE_FALSE);
1881
1882	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1883		<< "{\n"
1884		<< "	highp int resultOk[];\n"
1885		<< "} sb_result;\n"
1886		<< "\n"
1887		<< "void main (void)\n"
1888		<< "{\n"
1889		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1890		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1891		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1892		<< "	bool allOk = true;\n"
1893		<< "\n";
1894
1895	// Verify data
1896
1897	if (m_storage == STORAGE_BUFFER)
1898	{
1899		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1900			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1901				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1902	}
1903	else if (m_storage == STORAGE_IMAGE)
1904	{
1905		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1906			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1907				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1908	}
1909	else
1910		DE_ASSERT(DE_FALSE);
1911
1912	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1913		<< "}\n";
1914
1915	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1916}
1917
1918glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1919{
1920	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1921	std::ostringstream	buf;
1922
1923	buf << "#version 310 es\n"
1924		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1925		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1926
1927	if (m_storage == STORAGE_BUFFER)
1928		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1929			<< "{\n"
1930			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1931			<< "} sb_out;\n";
1932	else if (m_storage == STORAGE_IMAGE)
1933		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1934	else
1935		DE_ASSERT(DE_FALSE);
1936
1937	buf << "\n"
1938		<< "void main (void)\n"
1939		<< "{\n"
1940		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1941		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1942		<< "\n";
1943
1944	// Write to buffer/image m_perInvocationSize elements
1945	if (m_storage == STORAGE_BUFFER)
1946	{
1947		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1948		{
1949			if (m_useAtomic)
1950				buf << "	atomicExchange(";
1951			else
1952				buf << "	";
1953
1954			buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1955
1956			if (m_useAtomic)
1957				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1958			else
1959				buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1960		}
1961	}
1962	else if (m_storage == STORAGE_IMAGE)
1963	{
1964		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1965		{
1966			if (m_useAtomic)
1967				buf << "	imageAtomicExchange";
1968			else
1969				buf << "	imageStore";
1970
1971			buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1972
1973			if (m_useAtomic)
1974				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1975			else
1976				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1977		}
1978	}
1979	else
1980		DE_ASSERT(DE_FALSE);
1981
1982	buf << "}\n";
1983
1984	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
1985}
1986
1987glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
1988{
1989	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1990	std::ostringstream	buf;
1991
1992	buf << "#version 310 es\n"
1993		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
1994		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1995
1996	if (m_storage == STORAGE_BUFFER)
1997		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1998			<< "{\n"
1999			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2000			<< "} sb_in;\n";
2001	else if (m_storage == STORAGE_IMAGE)
2002		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2003	else
2004		DE_ASSERT(DE_FALSE);
2005
2006	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2007		<< "{\n"
2008		<< "	highp int resultOk[];\n"
2009		<< "} sb_result;\n"
2010		<< "\n"
2011		<< "void main (void)\n"
2012		<< "{\n"
2013		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2014		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2015		<< "	int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2016		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2017		<< "	bool allOk = true;\n"
2018		<< "\n";
2019
2020	// Verify data
2021
2022	if (m_storage == STORAGE_BUFFER)
2023	{
2024		buf << "	if (groupNdx % 2 == 0)\n"
2025			<< "	{\n";
2026		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2027			buf << "		allOk = allOk && ("
2028				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2029				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2030		buf << "	}\n"
2031			<< "	else\n"
2032			<< "	{\n";
2033		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2034			buf << "		allOk = allOk && ("
2035				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2036				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2037		buf << "	}\n";
2038	}
2039	else if (m_storage == STORAGE_IMAGE)
2040	{
2041		buf << "	if (groupNdx % 2 == 0)\n"
2042			<< "	{\n";
2043		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2044			buf << "		allOk = allOk && ("
2045				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2046				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2047				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2048		buf << "	}\n"
2049			<< "	else\n"
2050			<< "	{\n";
2051		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2052			buf << "		allOk = allOk && ("
2053				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2054				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2055				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2056		buf << "	}\n";
2057	}
2058	else
2059		DE_ASSERT(DE_FALSE);
2060
2061	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2062		<< "}\n";
2063
2064	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
2065}
2066
2067glu::ShaderProgram*	InterCallTestCase::genReadZeroProgram (void)
2068{
2069	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2070	std::ostringstream	buf;
2071
2072	buf << "#version 310 es\n"
2073		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
2074		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2075
2076	if (m_storage == STORAGE_BUFFER)
2077		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2078			<< "{\n"
2079			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2080			<< "} sb_in;\n";
2081	else if (m_storage == STORAGE_IMAGE)
2082		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2083	else
2084		DE_ASSERT(DE_FALSE);
2085
2086	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2087		<< "{\n"
2088		<< "	highp int resultOk[];\n"
2089		<< "} sb_result;\n"
2090		<< "\n"
2091		<< "void main (void)\n"
2092		<< "{\n"
2093		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2094		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2095		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2096		<< "	bool allOk = true;\n"
2097		<< "\n";
2098
2099	// Verify data
2100
2101	if (m_storage == STORAGE_BUFFER)
2102	{
2103		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2104			buf << "	allOk = allOk && ("
2105				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2106				<< ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2107	}
2108	else if (m_storage == STORAGE_IMAGE)
2109	{
2110		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2111			buf << "	allOk = allOk && ("
2112			<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2113			<< ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2114	}
2115	else
2116		DE_ASSERT(DE_FALSE);
2117
2118	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2119		<< "}\n";
2120
2121	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
2122}
2123
2124class SSBOConcurrentAtomicCase : public TestCase
2125{
2126public:
2127
2128							SSBOConcurrentAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2129							~SSBOConcurrentAtomicCase	(void);
2130
2131	void					init						(void);
2132	void					deinit						(void);
2133	IterateResult			iterate						(void);
2134
2135private:
2136	std::string				genComputeSource			(void) const;
2137
2138	const int				m_numCalls;
2139	const int				m_workSize;
2140	glu::ShaderProgram*		m_program;
2141	deUint32				m_bufferID;
2142	std::vector<deUint32>	m_intermediateResultBuffers;
2143};
2144
2145SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2146	: TestCase		(context, name, description)
2147	, m_numCalls	(numCalls)
2148	, m_workSize	(workSize)
2149	, m_program		(DE_NULL)
2150	, m_bufferID	(DE_NULL)
2151{
2152}
2153
2154SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2155{
2156	deinit();
2157}
2158
2159void SSBOConcurrentAtomicCase::init (void)
2160{
2161	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2162	std::vector<deUint32>	zeroData			(m_workSize, 0);
2163
2164	// gen buffers
2165
2166	gl.genBuffers(1, &m_bufferID);
2167	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2168	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2169
2170	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2171	{
2172		deUint32 buffer = 0;
2173
2174		gl.genBuffers(1, &buffer);
2175		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2176		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2177
2178		m_intermediateResultBuffers.push_back(buffer);
2179		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2180	}
2181
2182	// gen program
2183
2184	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2185	m_testCtx.getLog() << *m_program;
2186	if (!m_program->isOk())
2187		throw tcu::TestError("could not build program");
2188}
2189
2190void SSBOConcurrentAtomicCase::deinit (void)
2191{
2192	if (m_bufferID)
2193	{
2194		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2195		m_bufferID = 0;
2196	}
2197
2198	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2199		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2200	m_intermediateResultBuffers.clear();
2201
2202	delete m_program;
2203	m_program = DE_NULL;
2204}
2205
2206TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2207{
2208	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2209	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2210	std::vector<int>		deltas;
2211
2212	// generate unique deltas
2213	generateShuffledRamp(m_numCalls, deltas);
2214
2215	// invoke program N times, each with a different delta
2216	{
2217		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2218
2219		m_testCtx.getLog()
2220			<< tcu::TestLog::Message
2221			<< "Running shader " << m_numCalls << " times.\n"
2222			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2223			<< "Setting u_atomicDelta to a unique value for each call.\n"
2224			<< tcu::TestLog::EndMessage;
2225
2226		if (deltaLocation == -1)
2227			throw tcu::TestError("u_atomicDelta location was -1");
2228
2229		gl.useProgram(m_program->getProgram());
2230		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2231
2232		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2233		{
2234			m_testCtx.getLog()
2235				<< tcu::TestLog::Message
2236				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2237				<< tcu::TestLog::EndMessage;
2238
2239			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2240			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2241			gl.dispatchCompute(m_workSize, 1, 1);
2242		}
2243
2244		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2245	}
2246
2247	// Verify result
2248	{
2249		std::vector<deUint32> result;
2250
2251		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2252
2253		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2254		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2255
2256		for (int ndx = 0; ndx < m_workSize; ++ndx)
2257		{
2258			if (result[ndx] != sumValue)
2259			{
2260				m_testCtx.getLog()
2261					<< tcu::TestLog::Message
2262					<< "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2263					<< "Work buffer contains invalid values."
2264					<< tcu::TestLog::EndMessage;
2265
2266				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2267				return STOP;
2268			}
2269		}
2270
2271		m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2272	}
2273
2274	// verify steps
2275	{
2276		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2277		std::vector<deUint32>				valueChain			(m_numCalls);
2278
2279		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2280
2281		// collect results
2282
2283		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2284		{
2285			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2286			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2287		}
2288
2289		// verify values
2290
2291		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2292		{
2293			int			invalidOperationNdx;
2294			deUint32	errorDelta;
2295			deUint32	errorExpected;
2296
2297			// collect result chain for each element
2298			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2299				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2300
2301			// check there exists a path from 0 to sumValue using each addition once
2302			// decompose cumulative results to addition operations (all additions positive => this works)
2303
2304			std::sort(valueChain.begin(), valueChain.end());
2305
2306			// validate chain
2307			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2308			{
2309				m_testCtx.getLog()
2310					<< tcu::TestLog::Message
2311					<< "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2312					<< "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2313					<< tcu::TestLog::EndMessage;
2314
2315				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2316					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2317				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2318
2319				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2320				return STOP;
2321			}
2322		}
2323
2324		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2325	}
2326
2327	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2328	return STOP;
2329}
2330
2331std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2332{
2333	std::ostringstream buf;
2334
2335	buf	<< "#version 310 es\n"
2336		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2337		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2338		<< "{\n"
2339		<< "	highp uint values[" << m_workSize << "];\n"
2340		<< "} sb_ires;\n"
2341		<< "\n"
2342		<< "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2343		<< "{\n"
2344		<< "	highp uint values[" << m_workSize << "];\n"
2345		<< "} sb_work;\n"
2346		<< "uniform highp uint u_atomicDelta;\n"
2347		<< "\n"
2348		<< "void main ()\n"
2349		<< "{\n"
2350		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2351		<< "	sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2352		<< "}";
2353
2354	return buf.str();
2355}
2356
2357class ConcurrentAtomicCounterCase : public TestCase
2358{
2359public:
2360
2361							ConcurrentAtomicCounterCase		(Context& context, const char* name, const char* description, int numCalls, int workSize);
2362							~ConcurrentAtomicCounterCase	(void);
2363
2364	void					init							(void);
2365	void					deinit							(void);
2366	IterateResult			iterate							(void);
2367
2368private:
2369	std::string				genComputeSource				(bool evenOdd) const;
2370
2371	const int				m_numCalls;
2372	const int				m_workSize;
2373	glu::ShaderProgram*		m_evenProgram;
2374	glu::ShaderProgram*		m_oddProgram;
2375	deUint32				m_counterBuffer;
2376	deUint32				m_intermediateResultBuffer;
2377};
2378
2379ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2380	: TestCase					(context, name, description)
2381	, m_numCalls				(numCalls)
2382	, m_workSize				(workSize)
2383	, m_evenProgram				(DE_NULL)
2384	, m_oddProgram				(DE_NULL)
2385	, m_counterBuffer			(DE_NULL)
2386	, m_intermediateResultBuffer(DE_NULL)
2387{
2388}
2389
2390ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2391{
2392	deinit();
2393}
2394
2395void ConcurrentAtomicCounterCase::init (void)
2396{
2397	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2398	const std::vector<deUint32>	zeroData	(m_numCalls * m_workSize, 0);
2399
2400	// gen buffer
2401
2402	gl.genBuffers(1, &m_counterBuffer);
2403	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2404	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2405
2406	gl.genBuffers(1, &m_intermediateResultBuffer);
2407	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2408	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2409
2410	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2411
2412	// gen programs
2413
2414	{
2415		const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2416
2417		m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2418		m_testCtx.getLog() << *m_evenProgram;
2419		if (!m_evenProgram->isOk())
2420			throw tcu::TestError("could not build program");
2421	}
2422	{
2423		const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2424
2425		m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2426		m_testCtx.getLog() << *m_oddProgram;
2427		if (!m_oddProgram->isOk())
2428			throw tcu::TestError("could not build program");
2429	}
2430}
2431
2432void ConcurrentAtomicCounterCase::deinit (void)
2433{
2434	if (m_counterBuffer)
2435	{
2436		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2437		m_counterBuffer = 0;
2438	}
2439	if (m_intermediateResultBuffer)
2440	{
2441		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2442		m_intermediateResultBuffer = 0;
2443	}
2444
2445	delete m_evenProgram;
2446	m_evenProgram = DE_NULL;
2447
2448	delete m_oddProgram;
2449	m_oddProgram = DE_NULL;
2450}
2451
2452TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2453{
2454	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2455
2456	// invoke program N times, each with a different delta
2457	{
2458		const int evenCallNdxLocation	= gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2459		const int oddCallNdxLocation	= gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2460
2461		m_testCtx.getLog()
2462			<< tcu::TestLog::Message
2463			<< "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2464			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2465			<< tcu::TestLog::EndMessage;
2466
2467		if (evenCallNdxLocation == -1)
2468			throw tcu::TestError("u_callNdx location was -1");
2469		if (oddCallNdxLocation == -1)
2470			throw tcu::TestError("u_callNdx location was -1");
2471
2472		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2473		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
2474
2475		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2476		{
2477			gl.useProgram(m_evenProgram->getProgram());
2478			gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2479			gl.dispatchCompute(m_workSize, 1, 1);
2480
2481			gl.useProgram(m_oddProgram->getProgram());
2482			gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2483			gl.dispatchCompute(m_workSize, 1, 1);
2484		}
2485
2486		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2487	}
2488
2489	// Verify result
2490	{
2491		deUint32 result;
2492
2493		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2494
2495		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2496		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2497
2498		if ((int)result != m_numCalls*m_workSize)
2499		{
2500			m_testCtx.getLog()
2501				<< tcu::TestLog::Message
2502				<< "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2503				<< tcu::TestLog::EndMessage;
2504
2505			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2506			return STOP;
2507		}
2508
2509		m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2510	}
2511
2512	// verify steps
2513	{
2514		std::vector<deUint32> intermediateResults;
2515
2516		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2517
2518		// collect results
2519
2520		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2521		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2522
2523		// verify values
2524
2525		std::sort(intermediateResults.begin(), intermediateResults.end());
2526
2527		for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2528		{
2529			if ((int)intermediateResults[valueNdx] != valueNdx)
2530			{
2531				m_testCtx.getLog()
2532					<< tcu::TestLog::Message
2533					<< "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2534					<< "Intermediate buffer contains invalid values. Intermediate results:\n"
2535					<< tcu::TestLog::EndMessage;
2536
2537				for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2538					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2539
2540				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2541				return STOP;
2542			}
2543		}
2544
2545		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2546	}
2547
2548	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2549	return STOP;
2550}
2551
2552std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2553{
2554	std::ostringstream buf;
2555
2556	buf	<< "#version 310 es\n"
2557		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2558		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2559		<< "{\n"
2560		<< "	highp uint values[" << m_workSize * m_numCalls << "];\n"
2561		<< "} sb_ires;\n"
2562		<< "\n"
2563		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
2564		<< "uniform highp uint u_callNdx;\n"
2565		<< "\n"
2566		<< "void main ()\n"
2567		<< "{\n"
2568		<< "	highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2569		<< "	if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2570		<< "		sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2571		<< "}";
2572
2573	return buf.str();
2574}
2575
2576class ConcurrentImageAtomicCase : public TestCase
2577{
2578public:
2579
2580							ConcurrentImageAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2581							~ConcurrentImageAtomicCase	(void);
2582
2583	void					init						(void);
2584	void					deinit						(void);
2585	IterateResult			iterate						(void);
2586
2587private:
2588	void					readWorkImage				(std::vector<deUint32>& result);
2589
2590	std::string				genComputeSource			(void) const;
2591	std::string				genImageReadSource			(void) const;
2592	std::string				genImageClearSource			(void) const;
2593
2594	const int				m_numCalls;
2595	const int				m_workSize;
2596	glu::ShaderProgram*		m_program;
2597	glu::ShaderProgram*		m_imageReadProgram;
2598	glu::ShaderProgram*		m_imageClearProgram;
2599	deUint32				m_imageID;
2600	std::vector<deUint32>	m_intermediateResultBuffers;
2601};
2602
2603ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2604	: TestCase				(context, name, description)
2605	, m_numCalls			(numCalls)
2606	, m_workSize			(workSize)
2607	, m_program				(DE_NULL)
2608	, m_imageReadProgram	(DE_NULL)
2609	, m_imageClearProgram	(DE_NULL)
2610	, m_imageID				(DE_NULL)
2611{
2612}
2613
2614ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2615{
2616	deinit();
2617}
2618
2619void ConcurrentImageAtomicCase::init (void)
2620{
2621	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2622	std::vector<deUint32>	zeroData			(m_workSize * m_workSize, 0);
2623
2624	if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
2625		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2626
2627	// gen image
2628
2629	gl.genTextures(1, &m_imageID);
2630	gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2631	gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2632	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2633	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2634	GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2635
2636	// gen buffers
2637
2638	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2639	{
2640		deUint32 buffer = 0;
2641
2642		gl.genBuffers(1, &buffer);
2643		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2644		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2645
2646		m_intermediateResultBuffers.push_back(buffer);
2647		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2648	}
2649
2650	// gen programs
2651
2652	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2653	m_testCtx.getLog() << *m_program;
2654	if (!m_program->isOk())
2655		throw tcu::TestError("could not build program");
2656
2657	m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2658	if (!m_imageReadProgram->isOk())
2659	{
2660		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2661
2662		m_testCtx.getLog() << *m_imageReadProgram;
2663		throw tcu::TestError("could not build program");
2664	}
2665
2666	m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2667	if (!m_imageClearProgram->isOk())
2668	{
2669		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2670
2671		m_testCtx.getLog() << *m_imageClearProgram;
2672		throw tcu::TestError("could not build program");
2673	}
2674}
2675
2676void ConcurrentImageAtomicCase::deinit (void)
2677{
2678	if (m_imageID)
2679	{
2680		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2681		m_imageID = 0;
2682	}
2683
2684	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2685		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2686	m_intermediateResultBuffers.clear();
2687
2688	delete m_program;
2689	m_program = DE_NULL;
2690
2691	delete m_imageReadProgram;
2692	m_imageReadProgram = DE_NULL;
2693
2694	delete m_imageClearProgram;
2695	m_imageClearProgram = DE_NULL;
2696}
2697
2698TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2699{
2700	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2701	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2702	std::vector<int>		deltas;
2703
2704	// generate unique deltas
2705	generateShuffledRamp(m_numCalls, deltas);
2706
2707	// clear image
2708	{
2709		m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2710
2711		gl.useProgram(m_imageClearProgram->getProgram());
2712		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2713		gl.dispatchCompute(m_workSize, m_workSize, 1);
2714		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2715
2716		GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2717	}
2718
2719	// invoke program N times, each with a different delta
2720	{
2721		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2722
2723		m_testCtx.getLog()
2724			<< tcu::TestLog::Message
2725			<< "Running shader " << m_numCalls << " times.\n"
2726			<< "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2727			<< "Setting u_atomicDelta to a unique value for each call.\n"
2728			<< tcu::TestLog::EndMessage;
2729
2730		if (deltaLocation == -1)
2731			throw tcu::TestError("u_atomicDelta location was -1");
2732
2733		gl.useProgram(m_program->getProgram());
2734		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2735
2736		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2737		{
2738			m_testCtx.getLog()
2739				<< tcu::TestLog::Message
2740				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2741				<< tcu::TestLog::EndMessage;
2742
2743			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2744			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2745			gl.dispatchCompute(m_workSize, m_workSize, 1);
2746		}
2747
2748		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2749	}
2750
2751	// Verify result
2752	{
2753		std::vector<deUint32> result;
2754
2755		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2756
2757		readWorkImage(result);
2758
2759		for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2760		{
2761			if (result[ndx] != sumValue)
2762			{
2763				m_testCtx.getLog()
2764					<< tcu::TestLog::Message
2765					<< "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2766					<< "Work image contains invalid values."
2767					<< tcu::TestLog::EndMessage;
2768
2769				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2770				return STOP;
2771			}
2772		}
2773
2774		m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2775	}
2776
2777	// verify steps
2778	{
2779		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2780		std::vector<deUint32>				valueChain			(m_numCalls);
2781		std::vector<deUint32>				chainDelta			(m_numCalls);
2782
2783		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2784
2785		// collect results
2786
2787		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2788		{
2789			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2790			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2791		}
2792
2793		// verify values
2794
2795		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2796		{
2797			int			invalidOperationNdx;
2798			deUint32	errorDelta;
2799			deUint32	errorExpected;
2800
2801			// collect result chain for each element
2802			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2803				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2804
2805			// check there exists a path from 0 to sumValue using each addition once
2806			// decompose cumulative results to addition operations (all additions positive => this works)
2807
2808			std::sort(valueChain.begin(), valueChain.end());
2809
2810			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811				chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2812
2813			// chainDelta contains now the actual additions applied to the value
2814			std::sort(chainDelta.begin(), chainDelta.end());
2815
2816			// validate chain
2817			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2818			{
2819				m_testCtx.getLog()
2820					<< tcu::TestLog::Message
2821					<< "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2822					<< invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2823					<< "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2824					<< tcu::TestLog::EndMessage;
2825
2826				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2827					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2828				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2829
2830				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2831				return STOP;
2832			}
2833		}
2834
2835		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2836	}
2837
2838	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2839	return STOP;
2840}
2841
2842void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2843{
2844	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2845	glu::Buffer				resultBuffer	(m_context.getRenderContext());
2846
2847	// Read image to an ssbo
2848
2849	{
2850		const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2851
2852		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2853		gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2854
2855		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2856		gl.useProgram(m_imageReadProgram->getProgram());
2857
2858		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2859		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2860		gl.dispatchCompute(m_workSize, m_workSize, 1);
2861
2862		GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2863	}
2864
2865	// Read ssbo
2866	{
2867		const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2868		GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2869
2870		if (!ptr)
2871			throw tcu::TestError("mapBufferRange returned NULL");
2872
2873		result.resize(m_workSize * m_workSize);
2874		memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2875
2876		if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2877			throw tcu::TestError("unmapBuffer returned false");
2878	}
2879}
2880
2881std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2882{
2883	std::ostringstream buf;
2884
2885	buf	<< "#version 310 es\n"
2886		<< "#extension GL_OES_shader_image_atomic : require\n"
2887		<< "\n"
2888		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2889		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2890		<< "{\n"
2891		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2892		<< "} sb_ires;\n"
2893		<< "\n"
2894		<< "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2895		<< "uniform highp uint u_atomicDelta;\n"
2896		<< "\n"
2897		<< "void main ()\n"
2898		<< "{\n"
2899		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2900		<< "	sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2901		<< "}";
2902
2903	return buf.str();
2904}
2905
2906std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2907{
2908	std::ostringstream buf;
2909
2910	buf	<< "#version 310 es\n"
2911		<< "\n"
2912		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2913		<< "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2914		<< "{\n"
2915		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2916		<< "} sb_res;\n"
2917		<< "\n"
2918		<< "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2919		<< "\n"
2920		<< "void main ()\n"
2921		<< "{\n"
2922		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923		<< "	sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2924		<< "}";
2925
2926	return buf.str();
2927}
2928
2929std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2930{
2931	std::ostringstream buf;
2932
2933	buf	<< "#version 310 es\n"
2934		<< "\n"
2935		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936		<< "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2937		<< "\n"
2938		<< "void main ()\n"
2939		<< "{\n"
2940		<< "	imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2941		<< "}";
2942
2943	return buf.str();
2944}
2945
2946class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2947{
2948public:
2949							ConcurrentSSBOAtomicCounterMixedCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2950							~ConcurrentSSBOAtomicCounterMixedCase	(void);
2951
2952	void					init									(void);
2953	void					deinit									(void);
2954	IterateResult			iterate									(void);
2955
2956private:
2957	std::string				genSSBOComputeSource					(void) const;
2958	std::string				genAtomicCounterComputeSource			(void) const;
2959
2960	const int				m_numCalls;
2961	const int				m_workSize;
2962	deUint32				m_bufferID;
2963	glu::ShaderProgram*		m_ssboAtomicProgram;
2964	glu::ShaderProgram*		m_atomicCounterProgram;
2965};
2966
2967ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2968	: TestCase					(context, name, description)
2969	, m_numCalls				(numCalls)
2970	, m_workSize				(workSize)
2971	, m_bufferID				(DE_NULL)
2972	, m_ssboAtomicProgram		(DE_NULL)
2973	, m_atomicCounterProgram	(DE_NULL)
2974{
2975	// SSBO atomic XORs cancel out
2976	DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
2977}
2978
2979ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
2980{
2981	deinit();
2982}
2983
2984void ConcurrentSSBOAtomicCounterMixedCase::init (void)
2985{
2986	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2987	const deUint32				zeroBuf[2]	= { 0, 0 };
2988
2989	// gen buffer
2990
2991	gl.genBuffers(1, &m_bufferID);
2992	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2993	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
2994
2995	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2996
2997	// gen programs
2998
2999	{
3000		const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3001
3002		m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3003		m_testCtx.getLog() << *m_ssboAtomicProgram;
3004		if (!m_ssboAtomicProgram->isOk())
3005			throw tcu::TestError("could not build program");
3006	}
3007	{
3008		const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3009
3010		m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3011		m_testCtx.getLog() << *m_atomicCounterProgram;
3012		if (!m_atomicCounterProgram->isOk())
3013			throw tcu::TestError("could not build program");
3014	}
3015}
3016
3017void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3018{
3019	if (m_bufferID)
3020	{
3021		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3022		m_bufferID = 0;
3023	}
3024
3025	delete m_ssboAtomicProgram;
3026	m_ssboAtomicProgram = DE_NULL;
3027
3028	delete m_atomicCounterProgram;
3029	m_atomicCounterProgram = DE_NULL;
3030}
3031
3032TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3033{
3034	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3035
3036	m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3037
3038	// invoke programs N times
3039	{
3040		m_testCtx.getLog()
3041			<< tcu::TestLog::Message
3042			<< "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3043			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
3044			<< tcu::TestLog::EndMessage;
3045
3046		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3047		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
3048
3049		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3050		{
3051			gl.useProgram(m_atomicCounterProgram->getProgram());
3052			gl.dispatchCompute(m_workSize, 1, 1);
3053
3054			gl.useProgram(m_ssboAtomicProgram->getProgram());
3055			gl.dispatchCompute(m_workSize, 1, 1);
3056		}
3057
3058		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3059	}
3060
3061	// Verify result
3062	{
3063		deUint32 result;
3064
3065		// XORs cancel out, only addition is left
3066		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3067
3068		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3069		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3070
3071		if ((int)result != m_numCalls*m_workSize)
3072		{
3073			m_testCtx.getLog()
3074				<< tcu::TestLog::Message
3075				<< "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3076				<< tcu::TestLog::EndMessage;
3077
3078			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3079			return STOP;
3080		}
3081
3082		m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3083	}
3084
3085	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3086	return STOP;
3087}
3088
3089std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3090{
3091	std::ostringstream buf;
3092
3093	buf	<< "#version 310 es\n"
3094		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3095		<< "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3096		<< "{\n"
3097		<< "	highp uint targetValue;\n"
3098		<< "	highp uint dummy;\n"
3099		<< "} sb_work;\n"
3100		<< "\n"
3101		<< "void main ()\n"
3102		<< "{\n"
3103		<< "	// flip high bits\n"
3104		<< "	highp uint mask = uint(1) << (16u + (gl_GlobalInvocationID.x % 16u));\n"
3105		<< "	sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3106		<< "}";
3107
3108	return buf.str();
3109}
3110
3111std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3112{
3113	std::ostringstream buf;
3114
3115	buf	<< "#version 310 es\n"
3116		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3117		<< "\n"
3118		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
3119		<< "\n"
3120		<< "void main ()\n"
3121		<< "{\n"
3122		<< "	atomicCounterIncrement(u_counter);\n"
3123		<< "}";
3124
3125	return buf.str();
3126}
3127
3128} // anonymous
3129
3130SynchronizationTests::SynchronizationTests (Context& context)
3131	: TestCaseGroup(context, "synchronization", "Synchronization tests")
3132{
3133}
3134
3135SynchronizationTests::~SynchronizationTests (void)
3136{
3137}
3138
3139void SynchronizationTests::init (void)
3140{
3141	tcu::TestCaseGroup* const inInvocationGroup		= new tcu::TestCaseGroup(m_testCtx, "in_invocation",	"Test intra-invocation synchronization");
3142	tcu::TestCaseGroup* const interInvocationGroup	= new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3143	tcu::TestCaseGroup* const interCallGroup		= new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3144
3145	addChild(inInvocationGroup);
3146	addChild(interInvocationGroup);
3147	addChild(interCallGroup);
3148
3149	// .in_invocation & .inter_invocation
3150	{
3151		static const struct CaseConfig
3152		{
3153			const char*									namePrefix;
3154			const InterInvocationTestCase::StorageType	storage;
3155			const int									flags;
3156		} configs[] =
3157		{
3158			{ "image",			InterInvocationTestCase::STORAGE_IMAGE,		0										},
3159			{ "image_atomic",	InterInvocationTestCase::STORAGE_IMAGE,		InterInvocationTestCase::FLAG_ATOMIC	},
3160			{ "ssbo",			InterInvocationTestCase::STORAGE_BUFFER,	0										},
3161			{ "ssbo_atomic",	InterInvocationTestCase::STORAGE_BUFFER,	InterInvocationTestCase::FLAG_ATOMIC	},
3162		};
3163
3164		for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3165		{
3166			tcu::TestCaseGroup* const	targetGroup	= (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3167			const int					extraFlags	= (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3168
3169			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3170			{
3171				const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3172
3173				targetGroup->addChild(new InvocationWriteReadCase(m_context,
3174																  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3175																  (std::string("Write to ") + target + " and read it").c_str(),
3176																  configs[configNdx].storage,
3177																  configs[configNdx].flags | extraFlags));
3178
3179				targetGroup->addChild(new InvocationReadWriteCase(m_context,
3180																  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3181																  (std::string("Read form ") + target + " and then write to it").c_str(),
3182																  configs[configNdx].storage,
3183																  configs[configNdx].flags | extraFlags));
3184
3185				targetGroup->addChild(new InvocationOverWriteCase(m_context,
3186																  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3187																  (std::string("Write to ") + target + " twice and read it").c_str(),
3188																  configs[configNdx].storage,
3189																  configs[configNdx].flags | extraFlags));
3190
3191				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3192																   (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3193																   (std::string("Write to aliasing ") + target + " and read it").c_str(),
3194																   InvocationAliasWriteCase::TYPE_WRITE,
3195																   configs[configNdx].storage,
3196																   configs[configNdx].flags | extraFlags));
3197
3198				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3199																   (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3200																   (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3201																   InvocationAliasWriteCase::TYPE_OVERWRITE,
3202																   configs[configNdx].storage,
3203																   configs[configNdx].flags | extraFlags));
3204			}
3205		}
3206	}
3207
3208	// .inter_call
3209	{
3210		tcu::TestCaseGroup* const withBarrierGroup		= new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3211		tcu::TestCaseGroup* const withoutBarrierGroup	= new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3212
3213		interCallGroup->addChild(withBarrierGroup);
3214		interCallGroup->addChild(withoutBarrierGroup);
3215
3216		// .with_memory_barrier
3217		{
3218			static const struct CaseConfig
3219			{
3220				const char*								namePrefix;
3221				const InterCallTestCase::StorageType	storage;
3222				const int								flags;
3223			} configs[] =
3224			{
3225				{ "image",			InterCallTestCase::STORAGE_IMAGE,	0																		},
3226				{ "image_atomic",	InterCallTestCase::STORAGE_IMAGE,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3227				{ "ssbo",			InterCallTestCase::STORAGE_BUFFER,	0																		},
3228				{ "ssbo_atomic",	InterCallTestCase::STORAGE_BUFFER,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3229			};
3230
3231			const int seed0 = 123;
3232			const int seed1 = 457;
3233
3234			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3235			{
3236				const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3237
3238				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3239																 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3240																 (std::string("Write to ") + target + " and read it").c_str(),
3241																 configs[configNdx].storage,
3242																 configs[configNdx].flags,
3243																 InterCallOperations()
3244																	<< op::WriteData::Generate(1, seed0)
3245																	<< op::Barrier()
3246																	<< op::ReadData::Generate(1, seed0)));
3247
3248				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3249																 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3250																 (std::string("Read from ") + target + " and then write to it").c_str(),
3251																 configs[configNdx].storage,
3252																 configs[configNdx].flags,
3253																 InterCallOperations()
3254																	<< op::ReadZeroData::Generate(1)
3255																	<< op::Barrier()
3256																	<< op::WriteData::Generate(1, seed0)));
3257
3258				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3259																 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3260																 (std::string("Write to ") + target + " twice and read it").c_str(),
3261																 configs[configNdx].storage,
3262																 configs[configNdx].flags,
3263																 InterCallOperations()
3264																	<< op::WriteData::Generate(1, seed0)
3265																	<< op::Barrier()
3266																	<< op::WriteData::Generate(1, seed1)
3267																	<< op::Barrier()
3268																	<< op::ReadData::Generate(1, seed1)));
3269
3270				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3271																 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3272																 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3273																 configs[configNdx].storage,
3274																 configs[configNdx].flags,
3275																 InterCallOperations()
3276																	<< op::WriteData::Generate(1, seed0)
3277																	<< op::WriteData::Generate(2, seed1)
3278																	<< op::Barrier()
3279																	<< op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3280
3281				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282																 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3283																 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3284																 configs[configNdx].storage,
3285																 configs[configNdx].flags,
3286																 InterCallOperations()
3287																	<< op::WriteDataInterleaved::Generate(1, seed0, true)
3288																	<< op::WriteDataInterleaved::Generate(1, seed1, false)
3289																	<< op::Barrier()
3290																	<< op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3291
3292				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3293																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3294																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3295																 configs[configNdx].storage,
3296																 configs[configNdx].flags,
3297																 InterCallOperations()
3298																	<< op::WriteData::Generate(1, seed0)
3299																	<< op::WriteData::Generate(2, seed1)
3300																	<< op::Barrier()
3301																	<< op::ReadData::Generate(1, seed0)
3302																	<< op::ReadData::Generate(2, seed1)));
3303
3304				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3306																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3307																 configs[configNdx].storage,
3308																 configs[configNdx].flags,
3309																 InterCallOperations()
3310																	<< op::WriteData::Generate(1, seed0)
3311																	<< op::WriteData::Generate(2, seed1)
3312																	<< op::Barrier()
3313																	<< op::ReadData::Generate(2, seed1)
3314																	<< op::ReadData::Generate(1, seed0)));
3315			}
3316
3317			// .without_memory_barrier
3318			{
3319				struct InvocationConfig
3320				{
3321					const char*	name;
3322					int			count;
3323				};
3324
3325				static const InvocationConfig ssboInvocations[] =
3326				{
3327					{ "1k",		1024	},
3328					{ "4k",		4096	},
3329					{ "32k",	32768	},
3330				};
3331				static const InvocationConfig imageInvocations[] =
3332				{
3333					{ "8x8",		8	},
3334					{ "32x32",		32	},
3335					{ "128x128",	128	},
3336				};
3337				static const InvocationConfig counterInvocations[] =
3338				{
3339					{ "32",		32		},
3340					{ "128",	128		},
3341					{ "1k",		1024	},
3342				};
3343				static const int callCounts[] = { 2, 5, 100 };
3344
3345				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3346					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3347						withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3348
3349				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3350					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3351						withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3352
3353				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3354					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3355						withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3356
3357				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3358					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3359						withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3360			}
3361		}
3362	}
3363}
3364
3365} // Functional
3366} // gles31
3367} // deqp
3368