1/*-------------------------------------------------------------------------
2 * OpenGL Conformance Test Suite
3 * -----------------------------
4 *
5 * Copyright (c) 2014-2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 */ /*!
20 * \file
21 * \brief
22 */ /*-------------------------------------------------------------------*/
23
24#include "es31cComputeShaderTests.hpp"
25#include "gluContextInfo.hpp"
26#include "glwEnums.hpp"
27#include "glwFunctions.hpp"
28#include "tcuMatrix.hpp"
29#include "tcuMatrixUtil.hpp"
30#include "tcuRenderTarget.hpp"
31#include <cstdarg>
32#include <sstream>
33
34namespace glcts
35{
36
37using namespace glw;
38using tcu::Vec2;
39using tcu::Vec3;
40using tcu::Vec4;
41using tcu::UVec4;
42using tcu::UVec3;
43using tcu::Mat4;
44
45namespace
46{
47
48typedef Vec3  vec2;
49typedef Vec3  vec3;
50typedef Vec4  vec4;
51typedef UVec3 uvec3;
52typedef UVec4 uvec4;
53typedef Mat4  mat4;
54
55const char* const kGLSLVer = "#version 310 es\n";
56
57class ComputeShaderBase : public glcts::SubcaseBase
58{
59
60public:
61	virtual ~ComputeShaderBase()
62	{
63	}
64
65	ComputeShaderBase()
66		: renderTarget(m_context.getRenderContext().getRenderTarget()), pixelFormat(renderTarget.getPixelFormat())
67	{
68		g_color_eps = vec4(1.f / (1 << 13));
69		if (pixelFormat.redBits != 0)
70		{
71			g_color_eps.x() += 1.f / (static_cast<float>(1 << pixelFormat.redBits) - 1.0f);
72		}
73		if (pixelFormat.greenBits != 0)
74		{
75			g_color_eps.y() += 1.f / (static_cast<float>(1 << pixelFormat.greenBits) - 1.0f);
76		}
77		if (pixelFormat.blueBits != 0)
78		{
79			g_color_eps.z() += 1.f / (static_cast<float>(1 << pixelFormat.blueBits) - 1.0f);
80		}
81		if (pixelFormat.alphaBits != 0)
82		{
83			g_color_eps.w() += 1.f / (static_cast<float>(1 << pixelFormat.alphaBits) - 1.0f);
84		}
85	}
86
87	const tcu::RenderTarget& renderTarget;
88	const tcu::PixelFormat&  pixelFormat;
89	vec4					 g_color_eps;
90
91	uvec3 IndexTo3DCoord(GLuint idx, GLuint max_x, GLuint max_y)
92	{
93		const GLuint x = idx % max_x;
94		idx /= max_x;
95		const GLuint y = idx % max_y;
96		idx /= max_y;
97		const GLuint z = idx;
98		return uvec3(x, y, z);
99	}
100
101	bool CheckProgram(GLuint program, bool* compile_error = NULL)
102	{
103		GLint compile_status = GL_TRUE;
104		GLint status;
105		glGetProgramiv(program, GL_LINK_STATUS, &status);
106
107		if (status == GL_FALSE)
108		{
109			GLint attached_shaders = 0;
110			glGetProgramiv(program, GL_ATTACHED_SHADERS, &attached_shaders);
111
112			if (attached_shaders > 0)
113			{
114				std::vector<GLuint> shaders(attached_shaders);
115				glGetAttachedShaders(program, attached_shaders, NULL, &shaders[0]);
116
117				for (GLint i = 0; i < attached_shaders; ++i)
118				{
119					GLenum type;
120					glGetShaderiv(shaders[i], GL_SHADER_TYPE, reinterpret_cast<GLint*>(&type));
121					switch (type)
122					{
123					case GL_VERTEX_SHADER:
124						m_context.getTestContext().getLog()
125							<< tcu::TestLog::Message << "*** Vertex Shader ***" << tcu::TestLog::EndMessage;
126						break;
127					case GL_FRAGMENT_SHADER:
128						m_context.getTestContext().getLog()
129							<< tcu::TestLog::Message << "*** Fragment Shader ***" << tcu::TestLog::EndMessage;
130						break;
131					case GL_COMPUTE_SHADER:
132						m_context.getTestContext().getLog()
133							<< tcu::TestLog::Message << "*** Compute Shader ***" << tcu::TestLog::EndMessage;
134						break;
135					default:
136						m_context.getTestContext().getLog()
137							<< tcu::TestLog::Message << "*** Unknown Shader ***" << tcu::TestLog::EndMessage;
138						break;
139					}
140
141					GLint res;
142					glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &res);
143					if (res != GL_TRUE)
144						compile_status = res;
145
146					GLint length = 0;
147					glGetShaderiv(shaders[i], GL_SHADER_SOURCE_LENGTH, &length);
148					if (length > 0)
149					{
150						std::vector<GLchar> source(length);
151						glGetShaderSource(shaders[i], length, NULL, &source[0]);
152						m_context.getTestContext().getLog()
153							<< tcu::TestLog::Message << &source[0] << tcu::TestLog::EndMessage;
154					}
155
156					glGetShaderiv(shaders[i], GL_INFO_LOG_LENGTH, &length);
157					if (length > 0)
158					{
159						std::vector<GLchar> log(length);
160						glGetShaderInfoLog(shaders[i], length, NULL, &log[0]);
161						m_context.getTestContext().getLog()
162							<< tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
163					}
164				}
165			}
166
167			GLint length;
168			glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);
169			if (length > 0)
170			{
171				std::vector<GLchar> log(length);
172				glGetProgramInfoLog(program, length, NULL, &log[0]);
173				m_context.getTestContext().getLog() << tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
174			}
175		}
176
177		if (compile_error)
178			*compile_error = (compile_status == GL_TRUE ? false : true);
179		if (compile_status != GL_TRUE)
180			return false;
181		return status == GL_TRUE ? true : false;
182	}
183
184	GLuint CreateComputeProgram(const std::string& cs)
185	{
186		const GLuint p = glCreateProgram();
187
188		if (!cs.empty())
189		{
190			const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
191			glAttachShader(p, sh);
192			glDeleteShader(sh);
193			const char* const src[2] = { kGLSLVer, cs.c_str() };
194			glShaderSource(sh, 2, src, NULL);
195			glCompileShader(sh);
196		}
197
198		return p;
199	}
200
201	GLuint CreateProgram(const std::string& vs, const std::string& fs)
202	{
203		const GLuint p = glCreateProgram();
204
205		if (!vs.empty())
206		{
207			const GLuint sh = glCreateShader(GL_VERTEX_SHADER);
208			glAttachShader(p, sh);
209			glDeleteShader(sh);
210			const char* const src[2] = { kGLSLVer, vs.c_str() };
211			glShaderSource(sh, 2, src, NULL);
212			glCompileShader(sh);
213		}
214		if (!fs.empty())
215		{
216			const GLuint sh = glCreateShader(GL_FRAGMENT_SHADER);
217			glAttachShader(p, sh);
218			glDeleteShader(sh);
219			const char* const src[2] = { kGLSLVer, fs.c_str() };
220			glShaderSource(sh, 2, src, NULL);
221			glCompileShader(sh);
222		}
223
224		return p;
225	}
226
227	GLuint BuildShaderProgram(GLenum type, const std::string& source)
228	{
229		if (type == GL_COMPUTE_SHADER)
230		{
231			const char* const src[2] = { kGLSLVer, source.c_str() };
232			return glCreateShaderProgramv(type, 2, src);
233		}
234
235		const char* const src[2] = { kGLSLVer, source.c_str() };
236		return glCreateShaderProgramv(type, 2, src);
237	}
238
239	GLfloat distance(GLfloat p0, GLfloat p1)
240	{
241		return de::abs(p0 - p1);
242	}
243
244	inline bool ColorEqual(const vec4& c0, const vec4& c1, const vec4& epsilon)
245	{
246		if (distance(c0.x(), c1.x()) > epsilon.x())
247			return false;
248		if (distance(c0.y(), c1.y()) > epsilon.y())
249			return false;
250		if (distance(c0.z(), c1.z()) > epsilon.z())
251			return false;
252		if (distance(c0.w(), c1.w()) > epsilon.w())
253			return false;
254		return true;
255	}
256
257	inline bool ColorEqual(const vec3& c0, const vec3& c1, const vec4& epsilon)
258	{
259		if (distance(c0.x(), c1.x()) > epsilon.x())
260			return false;
261		if (distance(c0.y(), c1.y()) > epsilon.y())
262			return false;
263		if (distance(c0.z(), c1.z()) > epsilon.z())
264			return false;
265		return true;
266	}
267
268	bool ValidateReadBuffer(int x, int y, int w, int h, const vec4& expected)
269	{
270		std::vector<vec4>	display(w * h);
271		std::vector<GLubyte> data(w * h * 4);
272		glReadPixels(x, y, w, h, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
273
274		for (int i = 0; i < w * h * 4; i += 4)
275		{
276			display[i / 4] = vec4(static_cast<GLfloat>(data[i] / 255.), static_cast<GLfloat>(data[i + 1] / 255.),
277								  static_cast<GLfloat>(data[i + 2] / 255.), static_cast<GLfloat>(data[i + 3] / 255.));
278		}
279
280		for (int j = 0; j < h; ++j)
281		{
282			for (int i = 0; i < w; ++i)
283			{
284				if (!ColorEqual(display[j * w + i], expected, g_color_eps))
285				{
286					m_context.getTestContext().getLog()
287						<< tcu::TestLog::Message << "Color at (" << x + i << ", " << y + j << ") is ["
288						<< display[j * w + i].x() << ", " << display[j * w + i].y() << ", " << display[j * w + i].z()
289						<< ", " << display[j * w + i].w() << "] should be [" << expected.x() << ", " << expected.y()
290						<< ", " << expected.z() << ", " << expected.w() << "]." << tcu::TestLog::EndMessage;
291					return false;
292				}
293			}
294		}
295
296		return true;
297	}
298
299	bool ValidateReadBufferCenteredQuad(int width, int height, const vec3& expected)
300	{
301		bool				 result = true;
302		std::vector<vec3>	fb(width * height);
303		std::vector<GLubyte> data(width * height * 4);
304		glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
305
306		for (int i = 0; i < width * height * 4; i += 4)
307		{
308			fb[i / 4] = vec3(static_cast<GLfloat>(data[i] / 255.), static_cast<GLfloat>(data[i + 1] / 255.),
309							 static_cast<GLfloat>(data[i + 2] / 255.));
310		}
311
312		int startx = int((static_cast<float>(width) * 0.1f) + 1);
313		int starty = int((static_cast<float>(height) * 0.1f) + 1);
314		int endx   = int(static_cast<float>(width) - 2 * ((static_cast<float>(width) * 0.1f) + 1) - 1);
315		int endy   = int(static_cast<float>(height) - 2 * ((static_cast<float>(height) * 0.1f) + 1) - 1);
316
317		for (int y = starty; y < endy; ++y)
318		{
319			for (int x = startx; x < endx; ++x)
320			{
321				const int idx = y * width + x;
322				if (!ColorEqual(fb[idx], expected, g_color_eps))
323				{
324					return false;
325				}
326			}
327		}
328
329		if (!ColorEqual(fb[2 * width + 2], vec3(0), g_color_eps))
330		{
331			result = false;
332		}
333		if (!ColorEqual(fb[2 * width + (width - 3)], vec3(0), g_color_eps))
334		{
335			result = false;
336		}
337		if (!ColorEqual(fb[(height - 3) * width + (width - 3)], vec3(0), g_color_eps))
338		{
339			result = false;
340		}
341		if (!ColorEqual(fb[(height - 3) * width + 2], vec3(0), g_color_eps))
342		{
343			result = false;
344		}
345
346		return result;
347	}
348
349	int getWindowWidth()
350	{
351		return renderTarget.getWidth();
352	}
353
354	int getWindowHeight()
355	{
356		return renderTarget.getHeight();
357	}
358
359	bool ValidateWindow4Quads(const vec3& lb, const vec3& rb, const vec3& rt, const vec3& lt)
360	{
361		int					 width  = 100;
362		int					 height = 100;
363		std::vector<vec3>	fb(width * height);
364		std::vector<GLubyte> data(width * height * 4);
365		glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
366
367		for (int i = 0; i < width * height * 4; i += 4)
368		{
369			fb[i / 4] = vec3(static_cast<GLfloat>(data[i] / 255.), static_cast<GLfloat>(data[i + 1] / 255.),
370							 static_cast<GLfloat>(data[i + 2] / 255.));
371		}
372
373		bool status = true;
374
375		// left-bottom quad
376		for (int y = 10; y < height / 2 - 10; ++y)
377		{
378			for (int x = 10; x < width / 2 - 10; ++x)
379			{
380				const int idx = y * width + x;
381				if (!ColorEqual(fb[idx], lb, g_color_eps))
382				{
383					m_context.getTestContext().getLog()
384						<< tcu::TestLog::Message << "First bad color (" << x << ", " << y << "): " << fb[idx].x() << " "
385						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
386					status = false;
387				}
388			}
389		}
390		// right-bottom quad
391		for (int y = 10; y < height / 2 - 10; ++y)
392		{
393			for (int x = width / 2 + 10; x < width - 10; ++x)
394			{
395				const int idx = y * width + x;
396				if (!ColorEqual(fb[idx], rb, g_color_eps))
397				{
398					m_context.getTestContext().getLog()
399						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
400						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
401					status = false;
402				}
403			}
404		}
405		// right-top quad
406		for (int y = height / 2 + 10; y < height - 10; ++y)
407		{
408			for (int x = width / 2 + 10; x < width - 10; ++x)
409			{
410				const int idx = y * width + x;
411				if (!ColorEqual(fb[idx], rt, g_color_eps))
412				{
413					m_context.getTestContext().getLog()
414						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
415						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
416					status = false;
417				}
418			}
419		}
420		// left-top quad
421		for (int y = height / 2 + 10; y < height - 10; ++y)
422		{
423			for (int x = 10; x < width / 2 - 10; ++x)
424			{
425				const int idx = y * width + x;
426				if (!ColorEqual(fb[idx], lt, g_color_eps))
427				{
428					m_context.getTestContext().getLog()
429						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
430						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
431					status = false;
432				}
433			}
434		}
435		// middle horizontal line should be black
436		for (int y = height / 2 - 2; y < height / 2 + 2; ++y)
437		{
438			for (int x = 0; x < width; ++x)
439			{
440				const int idx = y * width + x;
441				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
442				{
443					m_context.getTestContext().getLog()
444						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
445						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
446					status = false;
447				}
448			}
449		}
450		// middle vertical line should be black
451		for (int y = 0; y < height; ++y)
452		{
453			for (int x = width / 2 - 2; x < width / 2 + 2; ++x)
454			{
455				const int idx = y * width + x;
456				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
457				{
458					m_context.getTestContext().getLog()
459						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
460						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
461					status = false;
462				}
463			}
464		}
465
466		return status;
467	}
468
469	bool IsEqual(vec4 a, vec4 b)
470	{
471		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
472	}
473
474	bool IsEqual(uvec4 a, uvec4 b)
475	{
476		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
477	}
478};
479
480class SimpleCompute : public ComputeShaderBase
481{
482
483	virtual std::string Title()
484	{
485		return "Simplest possible Compute Shader";
486	}
487
488	virtual std::string Purpose()
489	{
490		return "1. Verify that CS can be created, compiled and linked.\n"
491			   "2. Verify that local work size can be queried with GetProgramiv command.\n"
492			   "3. Verify that CS can be dispatched with DispatchCompute command.\n"
493			   "4. Verify that CS can write to SSBO.";
494	}
495
496	virtual std::string Method()
497	{
498		return "Create and dispatch CS. Verify SSBO content.";
499	}
500
501	virtual std::string PassCriteria()
502	{
503		return "Everything works as expected.";
504	}
505
506	GLuint m_program;
507	GLuint m_buffer;
508
509	virtual long Setup()
510	{
511
512		const char* const glsl_cs =
513			NL "layout(local_size_x = 1, local_size_y = 1) in;" NL "layout(std430) buffer Output {" NL "  vec4 data;" NL
514			   "} g_out;" NL "void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
515		m_program = CreateComputeProgram(glsl_cs);
516		glLinkProgram(m_program);
517		if (!CheckProgram(m_program))
518			return ERROR;
519
520		GLint v[3];
521		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
522		if (v[0] != 1 || v[1] != 1 || v[2] != 1)
523		{
524			m_context.getTestContext().getLog()
525				<< tcu::TestLog::Message << "Got " << v[0] << ", " << v[1] << ", " << v[2]
526				<< ", expected: 1, 1, 1 in GL_COMPUTE_WORK_GROUP_SIZE check" << tcu::TestLog::EndMessage;
527			return ERROR;
528		}
529
530		glGenBuffers(1, &m_buffer);
531		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
532		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), NULL, GL_DYNAMIC_DRAW);
533		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
534
535		return NO_ERROR;
536	}
537
538	virtual long Run()
539	{
540		glUseProgram(m_program);
541		glDispatchCompute(1, 1, 1);
542
543		vec4* data;
544		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer);
545		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
546		data	   = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
547		long error = NO_ERROR;
548		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
549		{
550			error = ERROR;
551		}
552		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
553		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
554		return error;
555	}
556
557	virtual long Cleanup()
558	{
559		glUseProgram(0);
560		glDeleteProgram(m_program);
561		glDeleteBuffers(1, &m_buffer);
562		return NO_ERROR;
563	}
564};
565
566class BasicOneWorkGroup : public ComputeShaderBase
567{
568
569	virtual std::string Title()
570	{
571		return "One work group with various local sizes";
572	}
573
574	virtual std::string Purpose()
575	{
576		return NL "1. Verify that declared local work size has correct effect." NL
577				  "2. Verify that the number of shader invocations is correct." NL
578				  "3. Verify that the built-in variables: gl_WorkGroupSize, gl_WorkGroupID, gl_GlobalInvocationID," NL
579				  "    gl_LocalInvocationID and gl_LocalInvocationIndex has correct values." NL
580				  "4. Verify that DispatchCompute and DispatchComputeIndirect commands work as expected.";
581	}
582
583	virtual std::string Method()
584	{
585		return NL "1. Create several CS with various local sizes." NL
586				  "2. Dispatch each CS with DispatchCompute and DispatchComputeIndirect commands." NL
587				  "3. Verify SSBO content.";
588	}
589
590	virtual std::string PassCriteria()
591	{
592		return "Everything works as expected.";
593	}
594
595	GLuint m_program;
596	GLuint m_storage_buffer;
597	GLuint m_dispatch_buffer;
598
599	std::string GenSource(int x, int y, int z, GLuint binding)
600	{
601		std::stringstream ss;
602		ss << NL "layout(local_size_x = " << x << ", local_size_y = " << y << ", local_size_z = " << z
603		   << ") in;" NL "layout(std430, binding = " << binding
604		   << ") buffer Output {" NL "  uvec4 local_id[];" NL "} g_out;" NL "void main() {" NL
605			  "  if (gl_WorkGroupSize == uvec3("
606		   << x << ", " << y << ", " << z
607		   << ") && gl_WorkGroupID == uvec3(0) &&" NL "      gl_GlobalInvocationID == gl_LocalInvocationID) {" NL
608			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(gl_LocalInvocationID, 0);" NL "  } else {" NL
609			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(0xffff);" NL "  }" NL "}";
610		return ss.str();
611	}
612
613	bool RunIteration(int local_size_x, int local_size_y, int local_size_z, GLuint binding, bool dispatch_indirect)
614	{
615		if (m_program != 0)
616			glDeleteProgram(m_program);
617		m_program = CreateComputeProgram(GenSource(local_size_x, local_size_y, local_size_z, binding));
618		glLinkProgram(m_program);
619		if (!CheckProgram(m_program))
620			return false;
621
622		GLint v[3];
623		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
624		if (v[0] != local_size_x || v[1] != local_size_y || v[2] != local_size_z)
625		{
626			m_context.getTestContext().getLog()
627				<< tcu::TestLog::Message << "GL_COMPUTE_LOCAL_WORK_SIZE is (" << v[0] << " " << v[1] << " " << v[2]
628				<< ") should be (" << local_size_x << " " << local_size_y << " " << local_size_z << ")"
629				<< tcu::TestLog::EndMessage;
630			return false;
631		}
632
633		const int kSize = local_size_x * local_size_y * local_size_z;
634
635		if (m_storage_buffer == 0)
636			glGenBuffers(1, &m_storage_buffer);
637		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, m_storage_buffer);
638		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kSize, NULL, GL_DYNAMIC_DRAW);
639		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
640
641		glUseProgram(m_program);
642		if (dispatch_indirect)
643		{
644			const GLuint num_groups[3] = { 1, 1, 1 };
645			if (m_dispatch_buffer == 0)
646				glGenBuffers(1, &m_dispatch_buffer);
647			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
648			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_DRAW);
649			glDispatchComputeIndirect(0);
650		}
651		else
652		{
653			glDispatchCompute(1, 1, 1);
654		}
655
656		uvec4* data;
657		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
658		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
659		data =
660			static_cast<uvec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, kSize * sizeof(uvec4), GL_MAP_READ_BIT));
661
662		bool ret = true;
663		for (int z = 0; z < local_size_z; ++z)
664		{
665			for (int y = 0; y < local_size_y; ++y)
666			{
667				for (int x = 0; x < local_size_x; ++x)
668				{
669					const int index = z * local_size_x * local_size_y + y * local_size_x + x;
670					if (!IsEqual(data[index], uvec4(x, y, z, 0)))
671					{
672						m_context.getTestContext().getLog()
673							<< tcu::TestLog::Message << "Invalid data at offset " << index << tcu::TestLog::EndMessage;
674						ret = false;
675					}
676				}
677			}
678		}
679		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
680		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
681		return ret;
682	}
683
684	virtual long Setup()
685	{
686		m_program		  = 0;
687		m_storage_buffer  = 0;
688		m_dispatch_buffer = 0;
689		return NO_ERROR;
690	}
691
692	virtual long Run()
693	{
694		if (!RunIteration(16, 1, 1, 0, true))
695			return ERROR;
696		if (!RunIteration(8, 8, 1, 1, false))
697			return ERROR;
698		if (!RunIteration(4, 4, 4, 2, true))
699			return ERROR;
700		if (!RunIteration(1, 2, 3, 3, false))
701			return ERROR;
702		if (!RunIteration(128, 1, 1, 3, true))
703			return ERROR;
704		if (!RunIteration(2, 8, 8, 3, false))
705			return ERROR;
706		if (!RunIteration(2, 2, 32, 7, true))
707			return ERROR;
708		return NO_ERROR;
709	}
710
711	virtual long Cleanup()
712	{
713		glUseProgram(0);
714		glDeleteProgram(m_program);
715		glDeleteBuffers(1, &m_storage_buffer);
716		glDeleteBuffers(1, &m_dispatch_buffer);
717		return NO_ERROR;
718	}
719};
720
721class BasicResourceUBO : public ComputeShaderBase
722{
723
724	virtual std::string Title()
725	{
726		return "Compute Shader resources - UBOs";
727	}
728
729	virtual std::string Purpose()
730	{
731		return "Verify that CS is able to read data from UBOs and write it to SSBO.";
732	}
733
734	virtual std::string Method()
735	{
736		return NL "1. Create CS which uses array of UBOs." NL
737				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
738				  "3. Read data from each UBO and write it to SSBO." NL "4. Verify SSBO content." NL
739				  "5. Repeat for different buffer and CS work sizes.";
740	}
741
742	virtual std::string PassCriteria()
743	{
744		return "Everything works as expected.";
745	}
746
747	GLuint m_program;
748	GLuint m_storage_buffer;
749	GLuint m_uniform_buffer[12];
750	GLuint m_dispatch_buffer;
751
752	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
753	{
754		const uvec3		  global_size = local_size * num_groups;
755		std::stringstream ss;
756		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
757		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
758		   << ", " << global_size.y() << ", " << global_size.z()
759		   << ");" NL "layout(std140) uniform InputBuffer {" NL "  vec4 data["
760		   << global_size.x() * global_size.y() * global_size.z()
761		   << "];" NL "} g_in_buffer[12];" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
762		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
763		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
764		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data3["
765		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data4["
766		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data5["
767		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data6["
768		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data7["
769		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data8["
770		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data9["
771		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data10["
772		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data11["
773		   << global_size.x() * global_size.y() * global_size.z()
774		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  uint global_index = gl_GlobalInvocationID.x +" NL
775			  "                      gl_GlobalInvocationID.y * kGlobalSize.x +" NL
776			  "                      gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
777			  "  g_out_buffer.data0[global_index] = g_in_buffer[0].data[global_index];" NL
778			  "  g_out_buffer.data1[global_index] = g_in_buffer[1].data[global_index];" NL
779			  "  g_out_buffer.data2[global_index] = g_in_buffer[2].data[global_index];" NL
780			  "  g_out_buffer.data3[global_index] = g_in_buffer[3].data[global_index];" NL
781			  "  g_out_buffer.data4[global_index] = g_in_buffer[4].data[global_index];" NL
782			  "  g_out_buffer.data5[global_index] = g_in_buffer[5].data[global_index];" NL
783			  "  g_out_buffer.data6[global_index] = g_in_buffer[6].data[global_index];" NL
784			  "  g_out_buffer.data7[global_index] = g_in_buffer[7].data[global_index];" NL
785			  "  g_out_buffer.data8[global_index] = g_in_buffer[8].data[global_index];" NL
786			  "  g_out_buffer.data9[global_index] = g_in_buffer[9].data[global_index];" NL
787			  "  g_out_buffer.data10[global_index] = g_in_buffer[10].data[global_index];" NL
788			  "  g_out_buffer.data11[global_index] = g_in_buffer[11].data[global_index];" NL "}";
789		return ss.str();
790	}
791
792	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
793	{
794		if (m_program != 0)
795			glDeleteProgram(m_program);
796		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
797		glLinkProgram(m_program);
798		if (!CheckProgram(m_program))
799			return false;
800
801		for (GLuint i = 0; i < 12; ++i)
802		{
803			char name[32];
804			sprintf(name, "InputBuffer[%u]", i);
805			const GLuint index = glGetUniformBlockIndex(m_program, name);
806			glUniformBlockBinding(m_program, index, i);
807		}
808
809		const GLuint kBufferSize =
810			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
811
812		if (m_storage_buffer == 0)
813			glGenBuffers(1, &m_storage_buffer);
814		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
815		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 12, NULL, GL_DYNAMIC_DRAW);
816		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
817
818		if (m_uniform_buffer[0] == 0)
819			glGenBuffers(12, m_uniform_buffer);
820		for (GLuint i = 0; i < 12; ++i)
821		{
822			std::vector<vec4> data(kBufferSize);
823			for (GLuint j = 0; j < kBufferSize; ++j)
824			{
825				data[j] = vec4(static_cast<float>(i) * static_cast<float>(kBufferSize) + static_cast<float>(j));
826			}
827			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
828			glBufferData(GL_UNIFORM_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
829		}
830		glBindBuffer(GL_UNIFORM_BUFFER, 0);
831
832		glUseProgram(m_program);
833		if (dispatch_indirect)
834		{
835			if (m_dispatch_buffer == 0)
836				glGenBuffers(1, &m_dispatch_buffer);
837			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
838			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
839			glDispatchComputeIndirect(0);
840		}
841		else
842		{
843			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
844		}
845
846		vec4* data;
847		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
848		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
849		data = static_cast<vec4*>(
850			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * 12 * kBufferSize, GL_MAP_READ_BIT));
851
852		bool ret = true;
853		for (GLuint z = 0; z < local_size.z() * num_groups.z(); ++z)
854		{
855			for (GLuint y = 0; y < local_size.y() * num_groups.y(); ++y)
856			{
857				for (GLuint x = 0; x < local_size.x() * num_groups.x(); ++x)
858				{
859					const GLuint index = z * local_size.x() * num_groups.x() * local_size.y() * num_groups.y() +
860										 y * local_size.x() * num_groups.x() + x;
861					for (int i = 0; i < 1; ++i)
862					{
863						if (!IsEqual(data[index * 12 + i], vec4(static_cast<float>(index * 12 + i))))
864						{
865							m_context.getTestContext().getLog() << tcu::TestLog::Message << "Invalid data at offset "
866																<< index * 12 + i << tcu::TestLog::EndMessage;
867							ret = false;
868						}
869					}
870				}
871			}
872		}
873		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
874		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
875		return ret;
876	}
877
878	virtual long Setup()
879	{
880		m_program		 = 0;
881		m_storage_buffer = 0;
882		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
883		m_dispatch_buffer = 0;
884		return NO_ERROR;
885	}
886
887	virtual long Run()
888	{
889		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
890			return ERROR;
891		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), true))
892			return ERROR;
893		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
894			return ERROR;
895		return NO_ERROR;
896	}
897
898	virtual long Cleanup()
899	{
900		glUseProgram(0);
901		glDeleteProgram(m_program);
902		glDeleteBuffers(1, &m_storage_buffer);
903		glDeleteBuffers(12, m_uniform_buffer);
904		glDeleteBuffers(1, &m_dispatch_buffer);
905		return NO_ERROR;
906	}
907};
908
909class BasicResourceTexture : public ComputeShaderBase
910{
911
912	virtual std::string Title()
913	{
914		return NL "Compute Shader resources - Textures";
915	}
916
917	virtual std::string Purpose()
918	{
919		return NL "Verify that texture access works correctly in CS.";
920	}
921
922	virtual std::string Method()
923	{
924		return NL "1. Create CS which uses all sampler types (sampler2D, sampler3D," NL "    sampler2DArray)." NL
925				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
926				  "3. Sample each texture and write sampled value to SSBO." NL "4. Verify SSBO content." NL
927				  "5. Repeat for different texture and CS work sizes.";
928	}
929
930	virtual std::string PassCriteria()
931	{
932		return NL "Everything works as expected.";
933	}
934
935	GLuint m_program;
936	GLuint m_storage_buffer;
937	GLuint m_texture[3];
938	GLuint m_dispatch_buffer;
939
940	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
941	{
942		const uvec3		  global_size = local_size * num_groups;
943		std::stringstream ss;
944		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
945		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
946		   << ", " << global_size.y() << ", " << global_size.z()
947		   << ");" NL "uniform sampler2D g_sampler0;" NL "uniform lowp sampler3D g_sampler1;" NL
948			  "uniform mediump sampler2DArray g_sampler2;" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
949		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
950		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
951		   << global_size.x() * global_size.y() * global_size.z()
952		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  uint global_index = gl_GlobalInvocationID.x +" NL
953			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
954			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
955			  "  g_out_buffer.data0[global_index] = texture(g_sampler0, vec2(gl_GlobalInvocationID) / "
956			  "vec2(kGlobalSize));" NL "  g_out_buffer.data1[global_index] = textureProj(g_sampler1, "
957			  "vec4(vec3(gl_GlobalInvocationID) / vec3(kGlobalSize), 1.0));" NL
958			  "  g_out_buffer.data2[global_index] = texelFetchOffset(g_sampler2, ivec3(gl_GlobalInvocationID), 0, "
959			  "ivec2(0));" NL "}";
960		return ss.str();
961	}
962
963	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
964	{
965		if (m_program != 0)
966			glDeleteProgram(m_program);
967		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
968		glLinkProgram(m_program);
969		if (!CheckProgram(m_program))
970			return false;
971
972		glUseProgram(m_program);
973		for (int i = 0; i < 4; ++i)
974		{
975			char name[32];
976			sprintf(name, "g_sampler%d", i);
977			glUniform1i(glGetUniformLocation(m_program, name), i);
978		}
979		glUseProgram(0);
980
981		const GLuint kBufferSize =
982			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
983		const GLint kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
984		const GLint kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
985		const GLint kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
986
987		std::vector<vec4> buffer_data(kBufferSize * 4);
988		if (m_storage_buffer == 0)
989			glGenBuffers(1, &m_storage_buffer);
990		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
991		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 4, &buffer_data[0], GL_DYNAMIC_DRAW);
992		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
993
994		std::vector<vec4> texture_data(kBufferSize, vec4(123.0f));
995		if (m_texture[0] == 0)
996			glGenTextures(3, m_texture);
997
998		glActiveTexture(GL_TEXTURE0);
999		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
1000		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1001		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1002		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1003
1004		glActiveTexture(GL_TEXTURE1);
1005		glBindTexture(GL_TEXTURE_3D, m_texture[1]);
1006		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1007		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1008		glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1009
1010		glActiveTexture(GL_TEXTURE2);
1011		glBindTexture(GL_TEXTURE_2D_ARRAY, m_texture[2]);
1012		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1013		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1014		glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT,
1015					 &texture_data[0]);
1016
1017		glUseProgram(m_program);
1018		if (dispatch_indirect)
1019		{
1020			if (m_dispatch_buffer == 0)
1021				glGenBuffers(1, &m_dispatch_buffer);
1022			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1023			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1024			glDispatchComputeIndirect(0);
1025		}
1026		else
1027		{
1028			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1029		}
1030
1031		vec4* data;
1032		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1033		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1034
1035		data = static_cast<vec4*>(
1036			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * 3 * kBufferSize, GL_MAP_READ_BIT));
1037		bool ret = true;
1038		for (GLuint index = 0; index < kBufferSize * 3; ++index)
1039		{
1040			if (!IsEqual(data[index], vec4(123.0f)))
1041			{
1042				m_context.getTestContext().getLog()
1043					<< tcu::TestLog::Message << "Invalid data at index " << index << tcu::TestLog::EndMessage;
1044				ret = false;
1045			}
1046		}
1047		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1048		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1049
1050		return ret;
1051	}
1052
1053	virtual long Setup()
1054	{
1055		m_program		 = 0;
1056		m_storage_buffer = 0;
1057		memset(m_texture, 0, sizeof(m_texture));
1058		m_dispatch_buffer = 0;
1059		return NO_ERROR;
1060	}
1061
1062	virtual long Run()
1063	{
1064		if (!RunIteration(uvec3(4, 4, 4), uvec3(8, 1, 1), false))
1065			return ERROR;
1066		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), true))
1067			return ERROR;
1068		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), false))
1069			return ERROR;
1070		return NO_ERROR;
1071	}
1072
1073	virtual long Cleanup()
1074	{
1075		glActiveTexture(GL_TEXTURE0);
1076		glUseProgram(0);
1077		glDeleteProgram(m_program);
1078		glDeleteBuffers(1, &m_storage_buffer);
1079		glDeleteTextures(3, m_texture);
1080		glDeleteBuffers(1, &m_dispatch_buffer);
1081		return NO_ERROR;
1082	}
1083};
1084
1085class BasicResourceImage : public ComputeShaderBase
1086{
1087
1088	virtual std::string Title()
1089	{
1090		return NL "Compute Shader resources - Images";
1091	}
1092
1093	virtual std::string Purpose()
1094	{
1095		return NL "Verify that reading/writing GPU memory via image variables work as expected.";
1096	}
1097
1098	virtual std::string Method()
1099	{
1100		return NL "1. Create CS which uses two image2D variables to read and write underlying GPU memory." NL
1101				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1102				  "3. Verify memory content." NL "4. Repeat for different texture and CS work sizes.";
1103	}
1104
1105	virtual std::string PassCriteria()
1106	{
1107		return NL "Everything works as expected.";
1108	}
1109
1110	GLuint m_program;
1111	GLuint m_draw_program;
1112	GLuint m_texture[2];
1113	GLuint m_dispatch_buffer;
1114	GLuint m_vertex_array;
1115
1116	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1117	{
1118		const uvec3		  global_size = local_size * num_groups;
1119		std::stringstream ss;
1120		if (m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1121		{
1122			ss << NL "#extension GL_OES_shader_image_atomic : enable";
1123		}
1124		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1125		   << ", local_size_z = " << local_size.z()
1126		   << ") in;" NL "layout(r32ui, binding=0) coherent uniform mediump uimage2D g_image1;" NL
1127			  "layout(r32ui, binding=1) uniform mediump uimage2D g_image2;" NL "const uvec3 kGlobalSize = uvec3("
1128		   << global_size.x() << ", " << global_size.y() << ", " << global_size.z()
1129		   << ");" NL "void main() {" NL
1130			  "  if (gl_GlobalInvocationID.x >= kGlobalSize.x || gl_GlobalInvocationID.y >= kGlobalSize.y) return;" NL
1131			  "  uvec4 color = uvec4(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y);";
1132		if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1133		{
1134			m_context.getTestContext().getLog()
1135				<< tcu::TestLog::Message << "Function imageAtomicAdd not supported, using imageStore"
1136				<< tcu::TestLog::EndMessage;
1137			ss << NL "  imageStore(g_image1, ivec2(gl_GlobalInvocationID), color);" NL
1138					 "  uvec4 c = imageLoad(g_image1, ivec2(gl_GlobalInvocationID));" NL
1139					 "  imageStore(g_image2, ivec2(gl_GlobalInvocationID), c);" NL "}";
1140		}
1141		else
1142		{
1143			m_context.getTestContext().getLog()
1144				<< tcu::TestLog::Message << "Using imageAtomicAdd" << tcu::TestLog::EndMessage;
1145			ss << NL "  imageStore(g_image1, ivec2(gl_GlobalInvocationID), uvec4(0));" NL
1146					 "  imageAtomicAdd(g_image1, ivec2(gl_GlobalInvocationID), color.x);" NL
1147					 "  uvec4 c = imageLoad(g_image1, ivec2(gl_GlobalInvocationID));" NL
1148					 "  imageStore(g_image2, ivec2(gl_GlobalInvocationID), c);" NL "}";
1149		}
1150
1151		return ss.str();
1152	}
1153
1154	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1155	{
1156		if (m_program != 0)
1157			glDeleteProgram(m_program);
1158		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1159		glLinkProgram(m_program);
1160		if (!CheckProgram(m_program))
1161			return false;
1162
1163		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1164		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1165		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1166		const GLuint kSize   = kWidth * kHeight * kDepth;
1167
1168		std::vector<uvec4> data(kSize);
1169		glGenTextures(2, m_texture);
1170
1171		for (int i = 0; i < 2; ++i)
1172		{
1173			glBindTexture(GL_TEXTURE_2D, m_texture[i]);
1174			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1175			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1176			glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
1177			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, &data[0]);
1178		}
1179		glBindTexture(GL_TEXTURE_2D, 0);
1180
1181		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1182		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1183		glUseProgram(m_program);
1184		if (dispatch_indirect)
1185		{
1186			if (m_dispatch_buffer == 0)
1187				glGenBuffers(1, &m_dispatch_buffer);
1188			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1189			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1190			glDispatchComputeIndirect(0);
1191		}
1192		else
1193		{
1194			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1195		}
1196		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1197
1198		glClear(GL_COLOR_BUFFER_BIT);
1199		glActiveTexture(GL_TEXTURE0);
1200		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
1201		glActiveTexture(GL_TEXTURE1);
1202		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
1203		glUseProgram(m_draw_program);
1204		glBindVertexArray(m_vertex_array);
1205		glViewport(0, 0, kWidth, kHeight);
1206		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
1207
1208		std::vector<vec4>	display(kWidth * kHeight);
1209		std::vector<GLubyte> colorData(kWidth * kHeight * 4);
1210		glReadPixels(0, 0, kWidth, kHeight, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
1211		glDeleteTextures(2, m_texture);
1212
1213		for (int i = 0; i < kWidth * kHeight * 4; i += 4)
1214		{
1215			display[i / 4] =
1216				vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
1217					 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
1218		}
1219
1220		/* As the colors are converted R8->Rx and then read back as Rx->R8,
1221		 need to add both conversions to the epsilon. */
1222		vec4 kColorEps = g_color_eps;
1223		kColorEps.x() += 1.f / ((1 << 8) - 1.0f);
1224		for (int y = 0; y < kHeight; ++y)
1225		{
1226			for (int x = 0; x < kWidth; ++x)
1227			{
1228				if (y >= getWindowHeight() || x >= getWindowWidth())
1229				{
1230					continue;
1231				}
1232				const vec4 c = vec4(float(y + x) / 255.0f, 1.0f, 1.0f, 1.0f);
1233				if (!ColorEqual(display[y * kWidth + x], c, kColorEps))
1234				{
1235					m_context.getTestContext().getLog()
1236						<< tcu::TestLog::Message << "Got red: " << display[y * kWidth + x].x() << ", expected " << c.x()
1237						<< ", at (" << x << ", " << y << ")" << tcu::TestLog::EndMessage;
1238					return false;
1239				}
1240			}
1241		}
1242
1243		return true;
1244	}
1245
1246	virtual long Setup()
1247	{
1248		m_program = 0;
1249		memset(m_texture, 0, sizeof(m_texture));
1250		m_dispatch_buffer = 0;
1251		return NO_ERROR;
1252	}
1253
1254	virtual long Run()
1255	{
1256
1257		const char* const glsl_vs =
1258			NL "const vec2 g_quad[] = vec2[](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));" NL "void main() {" NL
1259			   "  gl_Position = vec4(g_quad[gl_VertexID], 0, 1);" NL "}";
1260
1261		const char* glsl_fs =
1262			NL "layout(location = 0) out mediump vec4 o_color;" NL "uniform mediump usampler2D g_image1;" NL
1263			   "uniform mediump usampler2D g_image2;" NL "void main() {" NL
1264			   "  mediump uvec4 c1 = texelFetch(g_image1, ivec2(gl_FragCoord.xy), 0);" NL
1265			   "  mediump uvec4 c2 = texelFetch(g_image2, ivec2(gl_FragCoord.xy), 0);" NL
1266			   "  if (c1 == c2) o_color = vec4(float(c1.x)/255.0, 1.0, 1.0, 1.0);" NL
1267			   "  else o_color = vec4(1, 0, 0, 1);" NL "}";
1268
1269		m_draw_program = CreateProgram(glsl_vs, glsl_fs);
1270		glLinkProgram(m_draw_program);
1271		if (!CheckProgram(m_draw_program))
1272			return ERROR;
1273
1274		glUseProgram(m_draw_program);
1275		glUniform1i(glGetUniformLocation(m_draw_program, "g_image1"), 0);
1276		glUniform1i(glGetUniformLocation(m_draw_program, "g_image2"), 1);
1277		glUseProgram(0);
1278
1279		glGenVertexArrays(1, &m_vertex_array);
1280
1281		if (!RunIteration(uvec3(8, 16, 1), uvec3(8, 4, 1), true))
1282			return ERROR;
1283		if (!RunIteration(uvec3(4, 32, 1), uvec3(16, 2, 1), false))
1284			return ERROR;
1285		if (!RunIteration(uvec3(16, 4, 1), uvec3(4, 16, 1), false))
1286			return ERROR;
1287		if (!RunIteration(uvec3(8, 8, 1), uvec3(8, 8, 1), true))
1288			return ERROR;
1289
1290		return NO_ERROR;
1291	}
1292
1293	virtual long Cleanup()
1294	{
1295		glUseProgram(0);
1296		glDeleteProgram(m_program);
1297		glDeleteProgram(m_draw_program);
1298		glDeleteVertexArrays(1, &m_vertex_array);
1299		glDeleteTextures(2, m_texture);
1300		glDeleteBuffers(1, &m_dispatch_buffer);
1301		glViewport(0, 0, getWindowWidth(), getWindowHeight());
1302		return NO_ERROR;
1303	}
1304};
1305
1306class BasicResourceAtomicCounter : public ComputeShaderBase
1307{
1308
1309	virtual std::string Title()
1310	{
1311		return "Compute Shader resources - Atomic Counters";
1312	}
1313
1314	virtual std::string Purpose()
1315	{
1316		return NL
1317			"1. Verify that Atomic Counters work as expected in CS." NL
1318			"2. Verify that built-in functions: atomicCounterIncrement and atomicCounterDecrement work correctly.";
1319	}
1320
1321	virtual std::string Method()
1322	{
1323		return NL
1324			"1. Create CS which uses two atomic_uint variables." NL
1325			"2. In CS write values returned by atomicCounterIncrement and atomicCounterDecrement functions to SSBO." NL
1326			"3. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL "4. Verify SSBO content." NL
1327			"5. Repeat for different buffer and CS work sizes.";
1328	}
1329
1330	virtual std::string PassCriteria()
1331	{
1332		return "Everything works as expected.";
1333	}
1334
1335	GLuint m_program;
1336	GLuint m_storage_buffer;
1337	GLuint m_counter_buffer;
1338	GLuint m_dispatch_buffer;
1339
1340	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1341	{
1342		const uvec3		  global_size = local_size * num_groups;
1343		std::stringstream ss;
1344		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1345		   << ", local_size_z = " << local_size.z()
1346		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint inc_data["
1347		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uint dec_data["
1348		   << global_size.x() * global_size.y() * global_size.z()
1349		   << "];" NL "};" NL "layout(binding = 0, offset = 0) uniform atomic_uint g_inc_counter;" NL
1350			  "layout(binding = 0, offset = 4) uniform atomic_uint g_dec_counter;" NL "void main() {" NL
1351			  "  uint index = atomicCounterIncrement(g_inc_counter);" NL "  inc_data[index] = index;" NL
1352			  "  dec_data[index] = atomicCounterDecrement(g_dec_counter);" NL "}";
1353		return ss.str();
1354	}
1355
1356	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1357	{
1358		if (m_program != 0)
1359			glDeleteProgram(m_program);
1360		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1361		glLinkProgram(m_program);
1362		if (!CheckProgram(m_program))
1363			return false;
1364
1365		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1366		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1367		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1368		const GLuint kSize   = kWidth * kHeight * kDepth;
1369
1370		if (m_storage_buffer == 0)
1371			glGenBuffers(1, &m_storage_buffer);
1372		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1373		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kSize * 2, NULL, GL_DYNAMIC_DRAW);
1374		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1375
1376		if (m_counter_buffer == 0)
1377			glGenBuffers(1, &m_counter_buffer);
1378
1379		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counter_buffer);
1380		glBufferData(GL_ATOMIC_COUNTER_BUFFER, 2 * sizeof(GLuint), NULL, GL_STREAM_DRAW);
1381		*static_cast<GLuint*>(glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), GL_MAP_WRITE_BIT)) = 0;
1382		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1383		*static_cast<GLuint*>(
1384			glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), sizeof(GLuint), GL_MAP_WRITE_BIT)) = kSize;
1385		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1386
1387		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
1388
1389		glUseProgram(m_program);
1390		if (dispatch_indirect)
1391		{
1392			if (m_dispatch_buffer == 0)
1393				glGenBuffers(1, &m_dispatch_buffer);
1394			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1395			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1396			glDispatchComputeIndirect(0);
1397		}
1398		else
1399		{
1400			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1401		}
1402
1403		GLuint* data;
1404		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1405		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1406		data = static_cast<GLuint*>(
1407			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kSize, GL_MAP_READ_BIT));
1408
1409		bool ret = true;
1410		for (GLuint i = 0; i < kSize; ++i)
1411		{
1412			if (data[i] != i)
1413			{
1414				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Value at index " << i << " is "
1415													<< data[i] << " should be " << i << tcu::TestLog::EndMessage;
1416				ret = false;
1417			}
1418		}
1419		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1420		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1421
1422		GLuint* value;
1423		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counter_buffer);
1424		value =
1425			static_cast<GLuint*>(glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, 2 * sizeof(GLuint), GL_MAP_READ_BIT));
1426		if (value[0] != kSize)
1427		{
1428			m_context.getTestContext().getLog()
1429				<< tcu::TestLog::Message << "Final atomic counter value (buffer 0, offset 0) is " << value[0]
1430				<< " should be " << kSize << tcu::TestLog::EndMessage;
1431			ret = false;
1432		}
1433		if (value[1] != 0)
1434		{
1435			m_context.getTestContext().getLog()
1436				<< tcu::TestLog::Message << "Final atomic counter value (buffer 0, offset 4) is " << value[1]
1437				<< " should be 0" << tcu::TestLog::EndMessage;
1438			ret = false;
1439		}
1440		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1441		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
1442
1443		return ret;
1444	}
1445
1446	virtual long Setup()
1447	{
1448		m_program		  = 0;
1449		m_storage_buffer  = 0;
1450		m_counter_buffer  = 0;
1451		m_dispatch_buffer = 0;
1452		return NO_ERROR;
1453	}
1454
1455	virtual long Run()
1456	{
1457		if (!RunIteration(uvec3(4, 3, 2), uvec3(2, 3, 4), false))
1458			return ERROR;
1459		if (!RunIteration(uvec3(1, 1, 1), uvec3(1, 1, 1), true))
1460			return ERROR;
1461		if (!RunIteration(uvec3(1, 6, 1), uvec3(1, 1, 8), false))
1462			return ERROR;
1463		if (!RunIteration(uvec3(4, 1, 2), uvec3(10, 3, 4), true))
1464			return ERROR;
1465		return NO_ERROR;
1466	}
1467
1468	virtual long Cleanup()
1469	{
1470		glUseProgram(0);
1471		glDeleteProgram(m_program);
1472		glDeleteBuffers(1, &m_counter_buffer);
1473		glDeleteBuffers(1, &m_dispatch_buffer);
1474		glDeleteBuffers(1, &m_storage_buffer);
1475		return NO_ERROR;
1476	}
1477};
1478
1479class BasicResourceUniform : public ComputeShaderBase
1480{
1481
1482	virtual std::string Title()
1483	{
1484		return "Compute Shader resources - Uniforms";
1485	}
1486
1487	virtual std::string Purpose()
1488	{
1489		return NL "1. Verify that all types of uniform variables work as expected in CS." NL
1490				  "2. Verify that uniform variables can be updated with Uniform* commands.";
1491	}
1492
1493	virtual std::string Method()
1494	{
1495		return NL "1. Create CS which uses all (single precision and integer) types of uniform variables." NL
1496				  "2. Update uniform variables with Uniform* commands." NL
1497				  "3. Verify that uniform variables were updated correctly.";
1498	}
1499
1500	virtual std::string PassCriteria()
1501	{
1502		return "Everything works as expected.";
1503	}
1504
1505	GLuint m_program;
1506	GLuint m_storage_buffer;
1507
1508	virtual long Setup()
1509	{
1510		m_program		 = 0;
1511		m_storage_buffer = 0;
1512		return NO_ERROR;
1513	}
1514
1515	virtual long Run()
1516	{
1517		const char* const glsl_cs = NL
1518			"layout(local_size_x = 1) in;" NL "buffer Result {" NL "  int g_result;" NL "};" NL "uniform float g_0;" NL
1519			"uniform vec2 g_1;" NL "uniform vec3 g_2;" NL "uniform vec4 g_3;" NL "uniform mat2 g_4;" NL
1520			"uniform mat2x3 g_5;" NL "uniform mat2x4 g_6;" NL "uniform mat3x2 g_7;" NL "uniform mat3 g_8;" NL
1521			"uniform mat3x4 g_9;" NL "uniform mat4x2 g_10;" NL "uniform mat4x3 g_11;" NL "uniform mat4 g_12;" NL
1522			"uniform int g_13;" NL "uniform ivec2 g_14;" NL "uniform ivec3 g_15;" NL "uniform ivec4 g_16;" NL
1523			"uniform uint g_17;" NL "uniform uvec2 g_18;" NL "uniform uvec3 g_19;" NL "uniform uvec4 g_20;" NL NL
1524			"void main() {" NL "  g_result = 1;" NL NL "  if (g_0 != 1.0) g_result = 0;" NL
1525			"  if (g_1 != vec2(2.0, 3.0)) g_result = 0;" NL "  if (g_2 != vec3(4.0, 5.0, 6.0)) g_result = 0;" NL
1526			"  if (g_3 != vec4(7.0, 8.0, 9.0, 10.0)) g_result = 0;" NL NL
1527			"  if (g_4 != mat2(11.0, 12.0, 13.0, 14.0)) g_result = 0;" NL
1528			"  if (g_5 != mat2x3(15.0, 16.0, 17.0, 18.0, 19.0, 20.0)) g_result = 0;" NL
1529			"  if (g_6 != mat2x4(21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0)) g_result = 0;" NL NL
1530			"  if (g_7 != mat3x2(29.0, 30.0, 31.0, 32.0, 33.0, 34.0)) g_result = 0;" NL
1531			"  if (g_8 != mat3(35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0)) g_result = 0;" NL
1532			"  if (g_9 != mat3x4(44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0)) g_result = "
1533			"0;" NL NL "  if (g_10 != mat4x2(56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0)) g_result = 0;" NL
1534			"  if (g_11 != mat4x3(63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0)) g_result = "
1535			"0;" NL "  if (g_12 != mat4(75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, "
1536			"88.0, 89.0, 90.0)) g_result = 0;" NL NL "  if (g_13 != 91) g_result = 0;" NL
1537			"  if (g_14 != ivec2(92, 93)) g_result = 0;" NL "  if (g_15 != ivec3(94, 95, 96)) g_result = 0;" NL
1538			"  if (g_16 != ivec4(97, 98, 99, 100)) g_result = 0;" NL NL "  if (g_17 != 101u) g_result = 0;" NL
1539			"  if (g_18 != uvec2(102u, 103u)) g_result = 0;" NL
1540			"  if (g_19 != uvec3(104u, 105u, 106u)) g_result = 0;" NL
1541			"  if (g_20 != uvec4(107u, 108u, 109u, 110u)) g_result = 0;" NL "}";
1542
1543		m_program = CreateComputeProgram(glsl_cs);
1544		glLinkProgram(m_program);
1545		glUseProgram(m_program);
1546		if (!CheckProgram(m_program))
1547			return ERROR;
1548
1549		glGenBuffers(1, &m_storage_buffer);
1550		/* create buffer */
1551		{
1552			const int data = 123;
1553			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1554			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
1555		}
1556
1557		glUniform1f(glGetUniformLocation(m_program, "g_0"), 1.0f);
1558		glUniform2f(glGetUniformLocation(m_program, "g_1"), 2.0f, 3.0f);
1559		glUniform3f(glGetUniformLocation(m_program, "g_2"), 4.0f, 5.0f, 6.0f);
1560		glUniform4f(glGetUniformLocation(m_program, "g_3"), 7.0f, 8.0f, 9.0f, 10.0f);
1561
1562		/* mat2 */
1563		{
1564			const GLfloat value[4] = { 11.0f, 12.0f, 13.0f, 14.0f };
1565			glUniformMatrix2fv(glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
1566		}
1567		/* mat2x3 */
1568		{
1569			const GLfloat value[6] = { 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f };
1570			glUniformMatrix2x3fv(glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
1571		}
1572		/* mat2x4 */
1573		{
1574			const GLfloat value[8] = { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
1575			glUniformMatrix2x4fv(glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
1576		}
1577
1578		/* mat3x2 */
1579		{
1580			const GLfloat value[6] = { 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f };
1581			glUniformMatrix3x2fv(glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
1582		}
1583		/* mat3 */
1584		{
1585			const GLfloat value[9] = { 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f };
1586			glUniformMatrix3fv(glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
1587		}
1588		/* mat3x4 */
1589		{
1590			const GLfloat value[12] = { 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
1591										50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f };
1592			glUniformMatrix3x4fv(glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
1593		}
1594
1595		/* mat4x2 */
1596		{
1597			const GLfloat value[8] = { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f };
1598			glUniformMatrix4x2fv(glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
1599		}
1600		/* mat4x3 */
1601		{
1602			const GLfloat value[12] = {
1603				63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 27.0f, 73, 74.0f
1604			};
1605			glUniformMatrix4x3fv(glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
1606		}
1607		/* mat4 */
1608		{
1609			const GLfloat value[16] = { 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f,
1610										83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f };
1611			glUniformMatrix4fv(glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
1612		}
1613
1614		glUniform1i(glGetUniformLocation(m_program, "g_13"), 91);
1615		glUniform2i(glGetUniformLocation(m_program, "g_14"), 92, 93);
1616		glUniform3i(glGetUniformLocation(m_program, "g_15"), 94, 95, 96);
1617		glUniform4i(glGetUniformLocation(m_program, "g_16"), 97, 98, 99, 100);
1618
1619		glUniform1ui(glGetUniformLocation(m_program, "g_17"), 101);
1620		glUniform2ui(glGetUniformLocation(m_program, "g_18"), 102, 103);
1621		glUniform3ui(glGetUniformLocation(m_program, "g_19"), 104, 105, 106);
1622		glUniform4ui(glGetUniformLocation(m_program, "g_20"), 107, 108, 109, 110);
1623
1624		glDispatchCompute(1, 1, 1);
1625		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1626
1627		long error = NO_ERROR;
1628		/* validate */
1629		{
1630			int* data;
1631			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
1632			if (data[0] != 1)
1633			{
1634				m_context.getTestContext().getLog()
1635					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 1." << tcu::TestLog::EndMessage;
1636				error = ERROR;
1637			}
1638			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1639			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1640		}
1641
1642		return error;
1643	}
1644
1645	virtual long Cleanup()
1646	{
1647		glUseProgram(0);
1648		glDeleteProgram(m_program);
1649		glDeleteBuffers(1, &m_storage_buffer);
1650		return NO_ERROR;
1651	}
1652};
1653
1654class BasicBuiltinVariables : public ComputeShaderBase
1655{
1656
1657	virtual std::string Title()
1658	{
1659		return "CS built-in variables";
1660	}
1661
1662	virtual std::string Purpose()
1663	{
1664		return NL "Verify that all (gl_WorkGroupSize, gl_WorkGroupID, gl_LocalInvocationID," NL
1665				  "gl_GlobalInvocationID, gl_NumWorkGroups, gl_WorkGroupSize)" NL
1666				  "CS built-in variables has correct values.";
1667	}
1668
1669	virtual std::string Method()
1670	{
1671		return NL "1. Create CS which writes all built-in variables to SSBO." NL
1672				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1673				  "3. Verify SSBO content." NL "4. Repeat for several different local and global work sizes.";
1674	}
1675
1676	virtual std::string PassCriteria()
1677	{
1678		return "Everything works as expected.";
1679	}
1680
1681	GLuint m_program;
1682	GLuint m_storage_buffer;
1683	GLuint m_dispatch_buffer;
1684
1685	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1686	{
1687		const uvec3		  global_size = local_size * num_groups;
1688		std::stringstream ss;
1689		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1690		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
1691		   << ", " << global_size.y() << ", " << global_size.z()
1692		   << ");" NL "layout(std430) buffer OutputBuffer {" NL "  uvec4 num_work_groups["
1693		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_size["
1694		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_id["
1695		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_id["
1696		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 global_invocation_id["
1697		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_index["
1698		   << global_size.x() * global_size.y() * global_size.z()
1699		   << "];" NL "} g_out_buffer;" NL "void main() {" NL
1700			  "  if ((gl_WorkGroupSize * gl_WorkGroupID + gl_LocalInvocationID) != gl_GlobalInvocationID) return;" NL
1701			  "  uint global_index = gl_GlobalInvocationID.x +" NL
1702			  "                      gl_GlobalInvocationID.y * kGlobalSize.x +" NL
1703			  "                      gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
1704			  "  g_out_buffer.num_work_groups[global_index] = uvec4(gl_NumWorkGroups, 0);" NL
1705			  "  g_out_buffer.work_group_size[global_index] = uvec4(gl_WorkGroupSize, 0);" NL
1706			  "  g_out_buffer.work_group_id[global_index] = uvec4(gl_WorkGroupID, 0);" NL
1707			  "  g_out_buffer.local_invocation_id[global_index] = uvec4(gl_LocalInvocationID, 0);" NL
1708			  "  g_out_buffer.global_invocation_id[global_index] = uvec4(gl_GlobalInvocationID, 0);" NL
1709			  "  g_out_buffer.local_invocation_index[global_index] = uvec4(gl_LocalInvocationIndex);" NL "}";
1710		return ss.str();
1711	}
1712
1713	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1714	{
1715		if (m_program != 0)
1716			glDeleteProgram(m_program);
1717		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1718		glLinkProgram(m_program);
1719		if (!CheckProgram(m_program))
1720			return false;
1721
1722		const GLuint kBufferSize =
1723			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
1724
1725		std::vector<uvec4> data(kBufferSize * 6);
1726		if (m_storage_buffer == 0)
1727			glGenBuffers(1, &m_storage_buffer);
1728		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1729		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kBufferSize * 6, &data[0], GL_DYNAMIC_DRAW);
1730		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1731
1732		glUseProgram(m_program);
1733		if (dispatch_indirect)
1734		{
1735			if (m_dispatch_buffer == 0)
1736				glGenBuffers(1, &m_dispatch_buffer);
1737			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1738			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1739			glDispatchComputeIndirect(0);
1740		}
1741		else
1742		{
1743			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1744		}
1745
1746		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1747		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1748		uvec4* result;
1749		result = static_cast<uvec4*>(
1750			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(uvec4) * kBufferSize * 6, GL_MAP_READ_BIT));
1751
1752		// gl_NumWorkGroups
1753		for (GLuint index = 0; index < kBufferSize; ++index)
1754		{
1755			if (!IsEqual(result[index], uvec4(num_groups.x(), num_groups.y(), num_groups.z(), 0)))
1756			{
1757				m_context.getTestContext().getLog()
1758					<< tcu::TestLog::Message << "gl_NumWorkGroups: Invalid data at index " << index
1759					<< tcu::TestLog::EndMessage;
1760				return false;
1761			}
1762		}
1763		// gl_WorkGroupSize
1764		for (GLuint index = kBufferSize; index < 2 * kBufferSize; ++index)
1765		{
1766			if (!IsEqual(result[index], uvec4(local_size.x(), local_size.y(), local_size.z(), 0)))
1767			{
1768				m_context.getTestContext().getLog()
1769					<< tcu::TestLog::Message << "gl_WorkGroupSize: Invalid data at index " << index
1770					<< tcu::TestLog::EndMessage;
1771				return false;
1772			}
1773		}
1774		// gl_WorkGroupID
1775		for (GLuint index = 2 * kBufferSize; index < 3 * kBufferSize; ++index)
1776		{
1777			uvec3 expected = IndexTo3DCoord(index - 2 * kBufferSize, local_size.x() * num_groups.x(),
1778											local_size.y() * num_groups.y());
1779			expected.x() /= local_size.x();
1780			expected.y() /= local_size.y();
1781			expected.z() /= local_size.z();
1782			if (!IsEqual(result[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
1783			{
1784				m_context.getTestContext().getLog()
1785					<< tcu::TestLog::Message << "gl_WorkGroupSize: Invalid data at index " << index
1786					<< tcu::TestLog::EndMessage;
1787				return false;
1788			}
1789		}
1790		// gl_LocalInvocationID
1791		for (GLuint index = 3 * kBufferSize; index < 4 * kBufferSize; ++index)
1792		{
1793			uvec3 expected = IndexTo3DCoord(index - 3 * kBufferSize, local_size.x() * num_groups.x(),
1794											local_size.y() * num_groups.y());
1795			expected.x() %= local_size.x();
1796			expected.y() %= local_size.y();
1797			expected.z() %= local_size.z();
1798			if (!IsEqual(result[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
1799			{
1800				m_context.getTestContext().getLog()
1801					<< tcu::TestLog::Message << "gl_LocalInvocationID: Invalid data at index " << index
1802					<< tcu::TestLog::EndMessage;
1803				return false;
1804			}
1805		}
1806		// gl_GlobalInvocationID
1807		for (GLuint index = 4 * kBufferSize; index < 5 * kBufferSize; ++index)
1808		{
1809			uvec3 expected = IndexTo3DCoord(index - 4 * kBufferSize, local_size.x() * num_groups.x(),
1810											local_size.y() * num_groups.y());
1811			if (!IsEqual(result[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
1812			{
1813				m_context.getTestContext().getLog()
1814					<< tcu::TestLog::Message << "gl_GlobalInvocationID: Invalid data at index " << index
1815					<< tcu::TestLog::EndMessage;
1816				return false;
1817			}
1818		}
1819		// gl_LocalInvocationIndex
1820		for (GLuint index = 5 * kBufferSize; index < 6 * kBufferSize; ++index)
1821		{
1822			uvec3 coord = IndexTo3DCoord(index - 5 * kBufferSize, local_size.x() * num_groups.x(),
1823										 local_size.y() * num_groups.y());
1824			const GLuint expected = (coord.x() % local_size.x()) + (coord.y() % local_size.y()) * local_size.x() +
1825									(coord.z() % local_size.z()) * local_size.x() * local_size.y();
1826			if (!IsEqual(result[index], uvec4(expected)))
1827			{
1828				m_context.getTestContext().getLog()
1829					<< tcu::TestLog::Message << "gl_LocalInvocationIndex: Invalid data at index " << index
1830					<< tcu::TestLog::EndMessage;
1831				return false;
1832			}
1833		}
1834		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1835		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1836		return true;
1837	}
1838
1839	virtual long Setup()
1840	{
1841		m_program		  = 0;
1842		m_storage_buffer  = 0;
1843		m_dispatch_buffer = 0;
1844		return NO_ERROR;
1845	}
1846
1847	virtual long Run()
1848	{
1849		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
1850			return ERROR;
1851		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
1852			return ERROR;
1853		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
1854			return ERROR;
1855		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
1856			return ERROR;
1857		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
1858			return ERROR;
1859		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
1860			return ERROR;
1861		return NO_ERROR;
1862	}
1863
1864	virtual long Cleanup()
1865	{
1866		glUseProgram(0);
1867		glDeleteProgram(m_program);
1868		glDeleteBuffers(1, &m_storage_buffer);
1869		glDeleteBuffers(1, &m_dispatch_buffer);
1870		return NO_ERROR;
1871	}
1872};
1873
1874class BasicMax : public ComputeShaderBase
1875{
1876
1877	virtual std::string Title()
1878	{
1879		return NL "CS max values";
1880	}
1881
1882	virtual std::string Purpose()
1883	{
1884		return NL "Verify (on the API and GLSL side) that all GL_MAX_COMPUTE_* values are not less than" NL
1885				  "required by the OpenGL specification.";
1886	}
1887
1888	virtual std::string Method()
1889	{
1890		return NL "1. Use all API commands to query all GL_MAX_COMPUTE_* values. Verify that they are correct." NL
1891				  "2. Verify all gl_MaxCompute* constants in the GLSL.";
1892	}
1893
1894	virtual std::string PassCriteria()
1895	{
1896		return NL "Everything works as expected.";
1897	}
1898
1899	GLuint m_program;
1900	GLuint m_buffer;
1901
1902	bool CheckIndexed(GLenum target, const GLint* min_values)
1903	{
1904		GLint   i;
1905		GLint64 i64;
1906
1907		for (GLuint c = 0; c < 3; c++)
1908		{
1909			glGetIntegeri_v(target, c, &i);
1910			if (i < min_values[c])
1911			{
1912				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
1913													<< min_values[c] << tcu::TestLog::EndMessage;
1914				return false;
1915			}
1916		}
1917		for (GLuint c = 0; c < 3; c++)
1918		{
1919			glGetInteger64i_v(target, c, &i64);
1920			if (static_cast<GLint>(i64) < min_values[c])
1921			{
1922				m_context.getTestContext().getLog()
1923					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(i64) << " should be at least "
1924					<< min_values[c] << tcu::TestLog::EndMessage;
1925				return false;
1926			}
1927		}
1928
1929		return true;
1930	}
1931
1932	bool Check(GLenum target, const GLint min_value)
1933	{
1934		GLint	 i;
1935		GLint64   i64;
1936		GLfloat   f;
1937		GLboolean b;
1938
1939		glGetIntegerv(target, &i);
1940		if (i < min_value)
1941		{
1942			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
1943												<< min_value << tcu::TestLog::EndMessage;
1944			return false;
1945		}
1946		glGetInteger64v(target, &i64);
1947		if (static_cast<GLint>(i64) < min_value)
1948		{
1949			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << static_cast<GLint>(i64)
1950												<< " should be at least " << min_value << tcu::TestLog::EndMessage;
1951			return false;
1952		}
1953		glGetFloatv(target, &f);
1954		if (static_cast<GLint>(f) < min_value)
1955		{
1956			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << static_cast<GLint>(f)
1957												<< " should be at least " << min_value << tcu::TestLog::EndMessage;
1958			return false;
1959		}
1960		glGetBooleanv(target, &b);
1961		if (b == GL_FALSE)
1962		{
1963			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is GL_FALSE should be at least GL_TRUE."
1964												<< min_value << tcu::TestLog::EndMessage;
1965			return false;
1966		}
1967
1968		return true;
1969	}
1970
1971	virtual long Setup()
1972	{
1973		m_program = 0;
1974		m_buffer  = 0;
1975		return NO_ERROR;
1976	}
1977
1978	virtual long Run()
1979	{
1980		const GLint work_group_count[3] = { 65535, 65535, 65535 };
1981		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_COUNT, work_group_count))
1982			return ERROR;
1983
1984		const GLint work_group_size[3] = { 128, 128, 64 };
1985		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_SIZE, work_group_size))
1986			return ERROR;
1987
1988		if (!Check(GL_MAX_COMPUTE_UNIFORM_BLOCKS, 12))
1989			return ERROR;
1990		if (!Check(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, 16))
1991			return ERROR;
1992		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, 1))
1993			return ERROR;
1994		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTERS, 8))
1995			return ERROR;
1996		if (!Check(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, 16384))
1997			return ERROR;
1998		if (!Check(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, 512))
1999			return ERROR;
2000		if (!Check(GL_MAX_COMPUTE_IMAGE_UNIFORMS, 4))
2001			return ERROR;
2002		if (!Check(GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS, 512))
2003			return ERROR;
2004
2005		const char* const glsl_cs =
2006			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  int g_output;" NL "};" NL
2007			   "uniform ivec3 MaxComputeWorkGroupCount;" NL "uniform ivec3 MaxComputeWorkGroupSize;" NL
2008			   "uniform int MaxComputeUniformComponents;" NL "uniform int MaxComputeTextureImageUnits;" NL
2009			   "uniform int MaxComputeImageUniforms;" NL "uniform int MaxComputeAtomicCounters;" NL
2010			   "uniform int MaxComputeAtomicCounterBuffers;" NL "void main() {" NL "  g_output = 1;" NL
2011			   "  if (MaxComputeWorkGroupCount != gl_MaxComputeWorkGroupCount) g_output = 0;" NL
2012			   "  if (MaxComputeWorkGroupSize != gl_MaxComputeWorkGroupSize) g_output = 0;" NL
2013			   "  if (MaxComputeUniformComponents != gl_MaxComputeUniformComponents) g_output = 0;" NL
2014			   "  if (MaxComputeTextureImageUnits != gl_MaxComputeTextureImageUnits) g_output = 0;" NL
2015			   "  if (MaxComputeImageUniforms != gl_MaxComputeImageUniforms) g_output = 0;" NL
2016			   "  if (MaxComputeAtomicCounters != gl_MaxComputeAtomicCounters) g_output = 0;" NL
2017			   "  if (MaxComputeAtomicCounterBuffers != gl_MaxComputeAtomicCounterBuffers) g_output = 0;" NL "}";
2018		m_program = CreateComputeProgram(glsl_cs);
2019		glLinkProgram(m_program);
2020		if (!CheckProgram(m_program))
2021			return ERROR;
2022		glUseProgram(m_program);
2023
2024		GLint p[3];
2025		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &p[0]);
2026		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &p[1]);
2027		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &p[2]);
2028		glUniform3i(glGetUniformLocation(m_program, "MaxComputeWorkGroupCount"), p[0], p[1], p[2]);
2029
2030		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &p[0]);
2031		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &p[1]);
2032		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &p[2]);
2033		glUniform3iv(glGetUniformLocation(m_program, "MaxComputeWorkGroupSize"), 1, p);
2034
2035		glGetIntegerv(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, p);
2036		glUniform1i(glGetUniformLocation(m_program, "MaxComputeUniformComponents"), p[0]);
2037
2038		glGetIntegerv(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, p);
2039		glUniform1iv(glGetUniformLocation(m_program, "MaxComputeTextureImageUnits"), 1, p);
2040
2041		glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, p);
2042		glUniform1i(glGetUniformLocation(m_program, "MaxComputeImageUniforms"), p[0]);
2043
2044		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, p);
2045		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounters"), p[0]);
2046
2047		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, p);
2048		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounterBuffers"), p[0]);
2049
2050		GLint data = 0xffff;
2051		glGenBuffers(1, &m_buffer);
2052		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
2053		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint), &data, GL_DYNAMIC_DRAW);
2054
2055		glDispatchCompute(1, 1, 1);
2056
2057		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2058
2059		GLint* result;
2060		result	 = static_cast<GLint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), GL_MAP_READ_BIT));
2061		long error = NO_ERROR;
2062		if (result[0] != 1)
2063		{
2064			error = ERROR;
2065		}
2066
2067		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2068		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2069		return error;
2070	}
2071	virtual long Cleanup()
2072	{
2073		glUseProgram(0);
2074		glDeleteProgram(m_program);
2075		glDeleteBuffers(1, &m_buffer);
2076		return NO_ERROR;
2077	}
2078};
2079
2080class NegativeAttachShader : public ComputeShaderBase
2081{
2082
2083	virtual std::string Title()
2084	{
2085		return "Api Attach Shader";
2086	}
2087
2088	virtual std::string Purpose()
2089	{
2090		return NL "Verify that calling AttachShader with multiple shader objects of type COMPUTE_SHADER generates "
2091				  "INVALID_OPERATION.";
2092	}
2093
2094	virtual std::string Method()
2095	{
2096		return NL "Try to attach multiple shader objects of the same type and verify that proper error is generated.";
2097	}
2098
2099	virtual std::string PassCriteria()
2100	{
2101		return "INVALID_OPERATION is generated.";
2102	}
2103
2104	virtual long Run()
2105	{
2106		const char* const cs1[2] = { "#version 310 es", NL "layout(local_size_x = 1) in;" NL "void Run();" NL
2107														   "void main() {" NL "  Run();" NL "}" };
2108
2109		const char* const cs2 =
2110			"#version 310 es" NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
2111			"vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}";
2112
2113		const char* const cs3 =
2114			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(binding = 0, std430) buffer Output {" NL
2115			"  vec4 g_output;" NL "};" NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL
2116			"  return vec4(1, 2, 3, 4);" NL "}";
2117
2118		const GLuint sh1 = glCreateShader(GL_COMPUTE_SHADER);
2119
2120		GLint type;
2121		glGetShaderiv(sh1, GL_SHADER_TYPE, &type);
2122		if (static_cast<GLenum>(type) != GL_COMPUTE_SHADER)
2123		{
2124			m_context.getTestContext().getLog()
2125				<< tcu::TestLog::Message << "SHADER_TYPE should be COMPUTE_SHADER." << tcu::TestLog::EndMessage;
2126			glDeleteShader(sh1);
2127			return false;
2128		}
2129
2130		glShaderSource(sh1, 2, cs1, NULL);
2131		glCompileShader(sh1);
2132
2133		const GLuint sh2 = glCreateShader(GL_COMPUTE_SHADER);
2134		glShaderSource(sh2, 1, &cs2, NULL);
2135		glCompileShader(sh2);
2136
2137		const GLuint sh3 = glCreateShader(GL_COMPUTE_SHADER);
2138		glShaderSource(sh3, 1, &cs3, NULL);
2139		glCompileShader(sh3);
2140
2141		const GLuint p = glCreateProgram();
2142		glAttachShader(p, sh1);
2143		glAttachShader(p, sh2);
2144		if (glGetError() != GL_INVALID_OPERATION)
2145		{
2146			m_context.getTestContext().getLog()
2147				<< tcu::TestLog::Message
2148				<< "GL_INVALID_OPERATION error expected after attaching shader of the same type."
2149				<< tcu::TestLog::EndMessage;
2150			return ERROR;
2151		}
2152		glAttachShader(p, sh3);
2153		if (glGetError() != GL_INVALID_OPERATION)
2154		{
2155			m_context.getTestContext().getLog()
2156				<< tcu::TestLog::Message
2157				<< "GL_INVALID_OPERATION error expected after attaching shader of the same type."
2158				<< tcu::TestLog::EndMessage;
2159			return ERROR;
2160		}
2161
2162		glDeleteShader(sh1);
2163		glDeleteShader(sh2);
2164		glDeleteShader(sh3);
2165
2166		glUseProgram(0);
2167		glDeleteProgram(p);
2168
2169		return NO_ERROR;
2170	}
2171};
2172
2173class BasicBuildSeparable : public ComputeShaderBase
2174{
2175
2176	virtual std::string Title()
2177	{
2178		return "Building CS separable program";
2179	}
2180
2181	virtual std::string Purpose()
2182	{
2183		return NL "1. Verify that building separable CS program works as expected." NL
2184				  "2. Verify that program consisting from 4 strings works as expected.";
2185	}
2186
2187	virtual std::string Method()
2188	{
2189		return NL "1. Create, compile and link CS using CreateShaderProgramv command." NL
2190				  "2. Dispatch and verify CS program.";
2191	}
2192
2193	virtual std::string PassCriteria()
2194	{
2195		return "Everything works as expected.";
2196	}
2197
2198	virtual long Run()
2199	{
2200		const char* const cs[4] = {
2201			"#version 310 es",
2202
2203			NL "layout(local_size_x = 1) in;" NL "void Run();" NL "void main() {" NL "  Run();" NL "}",
2204
2205			NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
2206			   "vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}",
2207
2208			NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL "  return vec4(1, 2, 3, 4);" NL "}"
2209		};
2210
2211		const GLuint p   = glCreateShaderProgramv(GL_COMPUTE_SHADER, 4, cs);
2212		bool		 res = CheckProgram(p);
2213
2214		GLuint buffer;
2215		glGenBuffers(1, &buffer);
2216		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffer);
2217		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), &vec4(0.0f)[0], GL_DYNAMIC_DRAW);
2218		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2219
2220		glUseProgram(p);
2221		glDispatchCompute(1, 1, 1);
2222
2223		vec4* data;
2224		glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2225		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2226		data = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
2227		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2228		{
2229			m_context.getTestContext().getLog()
2230				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2231			res = false;
2232		}
2233		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2234
2235		glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &vec4(0.0f)[0]);
2236
2237		GLuint pipeline;
2238		glGenProgramPipelines(1, &pipeline);
2239		glUseProgramStages(pipeline, GL_COMPUTE_SHADER_BIT, p);
2240
2241		glUseProgram(0);
2242		glBindProgramPipeline(pipeline);
2243		glDispatchCompute(1, 1, 1);
2244
2245		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2246		data = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
2247
2248		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2249		{
2250			m_context.getTestContext().getLog()
2251				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2252			res = false;
2253		}
2254
2255		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2256		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2257		glDeleteProgramPipelines(1, &pipeline);
2258		glDeleteBuffers(1, &buffer);
2259		glDeleteProgram(p);
2260
2261		return res == true ? NO_ERROR : ERROR;
2262	}
2263};
2264
2265class BasicSharedSimple : public ComputeShaderBase
2266{
2267	virtual std::string Title()
2268	{
2269		return "Shared Memory - simple usage";
2270	}
2271
2272	virtual std::string Purpose()
2273	{
2274		return NL "1. Verify that shared array of uints works as expected." NL
2275				  "2. Verify that shared memory written by one invocation is observable by other invocations" NL
2276				  "    when groupMemoryBarrier() and barrier() built-in functions are used.";
2277	}
2278
2279	virtual std::string Method()
2280	{
2281		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2282				  "2. Verify results written by CS to SSBO." NL
2283				  "3. Repeat for several different number of work groups.";
2284	}
2285
2286	virtual std::string PassCriteria()
2287	{
2288		return "Everything works as expected.";
2289	}
2290
2291	GLuint m_program;
2292	GLuint m_storage_buffer;
2293	GLuint m_dispatch_buffer;
2294
2295	bool RunIteration(const GLuint num_groups, bool dispatch_indirect)
2296	{
2297		const GLuint kBufferSize = 128 * num_groups;
2298
2299		std::vector<GLuint> data(kBufferSize, 0xffff);
2300		if (m_storage_buffer == 0)
2301			glGenBuffers(1, &m_storage_buffer);
2302		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2303		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
2304		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2305
2306		glUseProgram(m_program);
2307		if (dispatch_indirect)
2308		{
2309			const GLuint groups[3] = { num_groups, 1, 1 };
2310			if (m_dispatch_buffer == 0)
2311				glGenBuffers(1, &m_dispatch_buffer);
2312			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2313			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
2314			glDispatchComputeIndirect(0);
2315		}
2316		else
2317		{
2318			glDispatchCompute(num_groups, 1, 1);
2319		}
2320
2321		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2322		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2323		GLuint* result;
2324		result = static_cast<GLuint*>(
2325			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, GL_MAP_READ_BIT));
2326		bool res = true;
2327		for (GLuint i = 0; i < kBufferSize; ++i)
2328		{
2329			if (result[i] != 1)
2330			{
2331				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
2332													<< result[i] << " should be 1." << tcu::TestLog::EndMessage;
2333				res = false;
2334			}
2335		}
2336		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2337		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2338		return res;
2339	}
2340
2341	virtual long Setup()
2342	{
2343		m_program		  = 0;
2344		m_storage_buffer  = 0;
2345		m_dispatch_buffer = 0;
2346		return NO_ERROR;
2347	}
2348
2349	virtual long Run()
2350	{
2351		const char* const glsl_cs =
2352			NL "layout(local_size_x = 128) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
2353			   "shared uint g_shared_data[128];" NL "void main() {" NL
2354			   "  g_shared_data[gl_LocalInvocationID.x] = gl_LocalInvocationIndex;" NL
2355			   "  groupMemoryBarrier();" // flush memory stores
2356			NL "  barrier();"			 // wait for all stores to finish
2357			NL "  g_output[gl_GlobalInvocationID.x] = 1u;" NL "  if (gl_LocalInvocationIndex < 127u) {" NL
2358			   "    uint res = g_shared_data[gl_LocalInvocationID.x + "
2359			   "1u];" // load data from shared memory filled by other thread
2360			NL "    if (res != (gl_LocalInvocationIndex + 1u)) {" NL "      g_output[gl_GlobalInvocationID.x] = 0u;" NL
2361			   "    }" NL "  }" NL "}";
2362		m_program = CreateComputeProgram(glsl_cs);
2363		glLinkProgram(m_program);
2364		if (!CheckProgram(m_program))
2365			return ERROR;
2366
2367		if (!RunIteration(1, false))
2368			return ERROR;
2369		if (!RunIteration(8, true))
2370			return ERROR;
2371		if (!RunIteration(13, false))
2372			return ERROR;
2373		if (!RunIteration(7, true))
2374			return ERROR;
2375		return NO_ERROR;
2376	}
2377	virtual long Cleanup()
2378	{
2379		glUseProgram(0);
2380		glDeleteProgram(m_program);
2381		glDeleteBuffers(1, &m_storage_buffer);
2382		glDeleteBuffers(1, &m_dispatch_buffer);
2383		return NO_ERROR;
2384	}
2385};
2386
2387class BasicSharedStruct : public ComputeShaderBase
2388{
2389	virtual std::string Title()
2390	{
2391		return "Shared Memory - arrays and structers";
2392	}
2393
2394	virtual std::string Purpose()
2395	{
2396		return NL "1. Verify that vectors, matrices, structers and arrays of those can be used" NL
2397				  "    as a shared memory." NL
2398				  "2. Verify that shared memory can be indexed with constant values, built-in" NL
2399				  "    variables and dynamic expressions." NL
2400				  "3. Verify that memoryBarrierAtomicCounter(), memoryBarrierImage(), memoryBarrier()," NL
2401				  "     memoryBarrierBuffer() and memoryBarrierShared() built-in functions are accepted" NL
2402				  "     by the GLSL compiler.";
2403	}
2404
2405	virtual std::string Method()
2406	{
2407		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2408				  "2. Verify results written by CS to SSBO.";
2409	}
2410
2411	virtual std::string PassCriteria()
2412	{
2413		return "Everything works as expected.";
2414	}
2415
2416	GLuint m_program;
2417	GLuint m_storage_buffer;
2418	GLuint m_dispatch_buffer;
2419
2420	bool RunIteration(bool dispatch_indirect)
2421	{
2422		const GLuint kBufferSize = 256;
2423
2424		std::vector<vec4> data(kBufferSize);
2425		if (m_storage_buffer == 0)
2426			glGenBuffers(1, &m_storage_buffer);
2427		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2428		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
2429		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2430
2431		glUseProgram(m_program);
2432		if (dispatch_indirect)
2433		{
2434			const GLuint groups[3] = { 1, 1, 1 };
2435			if (m_dispatch_buffer == 0)
2436				glGenBuffers(1, &m_dispatch_buffer);
2437			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2438			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
2439			glDispatchComputeIndirect(0);
2440		}
2441		else
2442		{
2443			glDispatchCompute(1, 1, 1);
2444		}
2445
2446		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2447		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2448		vec4* result;
2449		result = static_cast<vec4*>(
2450			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize, GL_MAP_READ_BIT));
2451		bool res = true;
2452		for (GLuint i = 0; i < kBufferSize; ++i)
2453		{
2454			if (!IsEqual(result[i], vec4(static_cast<float>(i))))
2455			{
2456				m_context.getTestContext().getLog()
2457					<< tcu::TestLog::Message << "Invalid data at index " << i << tcu::TestLog::EndMessage;
2458				res = false;
2459			}
2460		}
2461		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2462		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2463		return res;
2464	}
2465
2466	virtual long Setup()
2467	{
2468		m_program		  = 0;
2469		m_storage_buffer  = 0;
2470		m_dispatch_buffer = 0;
2471		return NO_ERROR;
2472	}
2473
2474	virtual long Run()
2475	{
2476		const char* const glsl_cs = NL
2477			"layout(local_size_x = 128) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[256];" NL "};" NL
2478			"struct SubData {" NL "  mat2x4 data;" NL "};" NL "struct Data {" NL "  uint index;" NL "  vec3 data0;" NL
2479			"  SubData data1;" NL "};" NL "shared Data g_shared_data[256];" NL "shared int g_shared_buf[2];" NL
2480			"void main() {" NL "  if (gl_LocalInvocationID.x == 0u) {" NL "    g_shared_buf[1] = 1;" NL
2481			"    g_shared_buf[1u + gl_LocalInvocationID.x] = 0;" NL "    g_shared_buf[0] = 128;" NL
2482			"    g_output[0] = vec4(g_shared_buf[1]);" NL "    g_output[128] = vec4(g_shared_buf[0]);" NL
2483			"    memoryBarrierBuffer();" // note: this call is not needed here, just check if compiler accepts it
2484			NL "  } else {" NL "    uint index = gl_LocalInvocationIndex;" NL
2485			"    g_shared_data[index].index = index;" NL "    g_shared_data[index + 128u].index = index + 128u;" NL
2486			"    g_shared_data[index].data1.data = mat2x4(0.0);" NL
2487			"    g_shared_data[index + 128u].data1.data = mat2x4(0.0);" NL
2488			"    g_output[index] = vec4(g_shared_data[index].index);" // load data from shared memory
2489			NL "    g_output[index + 128u] = vec4(g_shared_data[index + 128u].index);" NL
2490			"    memoryBarrierShared();" // note: this call is not needed here, just check if compiler accepts it
2491			NL "  }" NL "  memoryBarrierAtomicCounter();" NL "  memoryBarrierImage();" NL
2492			"  memoryBarrier();" // note: these calls are not needed here, just check if compiler accepts them
2493			NL "}";
2494		m_program = CreateComputeProgram(glsl_cs);
2495		glLinkProgram(m_program);
2496		if (!CheckProgram(m_program))
2497			return ERROR;
2498
2499		if (!RunIteration(false))
2500			return ERROR;
2501		if (!RunIteration(true))
2502			return ERROR;
2503		return NO_ERROR;
2504	}
2505
2506	virtual long Cleanup()
2507	{
2508		glUseProgram(0);
2509		glDeleteProgram(m_program);
2510		glDeleteBuffers(1, &m_storage_buffer);
2511		glDeleteBuffers(1, &m_dispatch_buffer);
2512		return NO_ERROR;
2513	}
2514};
2515
2516class BasicDispatchIndirect : public ComputeShaderBase
2517{
2518	virtual std::string Title()
2519	{
2520		return NL "DispatchComputeIndirect command";
2521	}
2522
2523	virtual std::string Purpose()
2524	{
2525		return NL
2526			"1. Verify that DispatchComputeIndirect command works as described in the OpenGL specification." NL
2527			"2. Verify that <offset> parameter is correctly applied." NL
2528			"3. Verify that updating dispatch buffer with different methods (BufferData, BufferSubData, MapBuffer)" NL
2529			"    just before DispatchComputeIndirect call works as expected." NL
2530			"4. Verify that GL_DISPATCH_INDIRECT_BUFFER_BINDING binding point is set correctly.";
2531	}
2532
2533	virtual std::string Method()
2534	{
2535		return NL
2536			"1. Create CS and dispatch indirect buffer." NL "2. Dispatch CS with DispatchComputeIndirect command." NL
2537			"3. Update dispatch indirect buffer." NL
2538			"4. Repeat several times updating dispatch buffer with different methods and changing <offset> parameter.";
2539	}
2540
2541	virtual std::string PassCriteria()
2542	{
2543		return NL "Everything works as expected.";
2544	}
2545
2546	GLuint m_program;
2547	GLuint m_storage_buffer;
2548	GLuint m_dispatch_buffer[2];
2549
2550	bool RunIteration(GLintptr offset, GLuint buffer_size)
2551	{
2552		std::vector<GLuint> data(buffer_size);
2553		if (m_storage_buffer == 0)
2554			glGenBuffers(1, &m_storage_buffer);
2555		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2556		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * buffer_size, &data[0], GL_DYNAMIC_DRAW);
2557
2558		glDispatchComputeIndirect(offset);
2559
2560		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2561		GLuint* result;
2562		result = static_cast<GLuint*>(
2563			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * buffer_size, GL_MAP_READ_BIT));
2564		bool res = true;
2565		for (GLuint i = 0; i < buffer_size; ++i)
2566		{
2567			if (result[i] != i)
2568			{
2569				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
2570													<< result[i] << " should be " << i << tcu::TestLog::EndMessage;
2571				res = false;
2572			}
2573		}
2574		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2575		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2576		return res;
2577	}
2578
2579	bool CheckBinding(GLuint expected)
2580	{
2581		GLint	 i;
2582		GLint64   i64;
2583		GLfloat   f;
2584		GLboolean b;
2585
2586		GLfloat expectedFloat = static_cast<GLfloat>(expected);
2587
2588		glGetIntegerv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i);
2589		if (static_cast<GLuint>(i) != expected)
2590		{
2591			return false;
2592		}
2593		glGetInteger64v(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i64);
2594		if (static_cast<GLuint>(i64) != expected)
2595		{
2596			return false;
2597		}
2598		glGetFloatv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &f);
2599		if (f != expectedFloat)
2600		{
2601			return false;
2602		}
2603		glGetBooleanv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &b);
2604		if (b != (expected != 0 ? GL_TRUE : GL_FALSE))
2605		{
2606			return false;
2607		}
2608
2609		return true;
2610	}
2611
2612	virtual long Setup()
2613	{
2614		m_program		 = 0;
2615		m_storage_buffer = 0;
2616		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
2617		return NO_ERROR;
2618	}
2619
2620	virtual long Run()
2621	{
2622		const char* const glsl_cs =
2623			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
2624			   "uniform uvec3 g_global_size;" NL "void main() {" NL "  uint global_index = gl_GlobalInvocationID.x +" NL
2625			   "                      gl_GlobalInvocationID.y * g_global_size.x +" NL
2626			   "                      gl_GlobalInvocationID.z * g_global_size.x * g_global_size.y;" NL
2627			   "  if (gl_NumWorkGroups != g_global_size) {" NL "    g_output[global_index] = 0xffffu;" NL
2628			   "    return;" NL "  }" NL "  g_output[global_index] = global_index;" NL "}";
2629		m_program = CreateComputeProgram(glsl_cs);
2630		glLinkProgram(m_program);
2631		if (!CheckProgram(m_program))
2632			return ERROR;
2633
2634		if (!CheckBinding(0))
2635			return ERROR;
2636
2637		glGenBuffers(2, m_dispatch_buffer);
2638
2639		const GLuint data[]  = { 1, 2, 3, 4, 5, 6, 7, 8 };
2640		const GLuint data2[] = { 3, 1, 4, 4 };
2641
2642		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
2643		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data), data, GL_STREAM_DRAW);
2644		if (!CheckBinding(m_dispatch_buffer[0]))
2645			return ERROR;
2646
2647		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
2648		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data2), data2, GL_STREAM_READ);
2649		if (!CheckBinding(m_dispatch_buffer[1]))
2650			return ERROR;
2651
2652		glUseProgram(m_program);
2653		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
2654
2655		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
2656		if (!RunIteration(0, 6))
2657			return ERROR;
2658
2659		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 2, 3, 4);
2660		if (!RunIteration(4, 24))
2661			return ERROR;
2662
2663		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 5, 6);
2664		if (!RunIteration(12, 120))
2665			return ERROR;
2666
2667		glBufferSubData(GL_DISPATCH_INDIRECT_BUFFER, 20, 12, data);
2668		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
2669		if (!RunIteration(20, 6))
2670			return ERROR;
2671
2672		GLuint* ptr = static_cast<GLuint*>(
2673			glMapBufferRange(GL_DISPATCH_INDIRECT_BUFFER, 0, sizeof(GLuint) * 4, GL_MAP_WRITE_BIT));
2674		*ptr++ = 4;
2675		*ptr++ = 4;
2676		*ptr++ = 4;
2677		glUnmapBuffer(GL_DISPATCH_INDIRECT_BUFFER);
2678
2679		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 4, 4);
2680		if (!RunIteration(0, 64))
2681			return ERROR;
2682
2683		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
2684
2685		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 4, 4);
2686		if (!RunIteration(4, 16))
2687			return ERROR;
2688
2689		glDeleteBuffers(2, m_dispatch_buffer);
2690		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
2691
2692		if (!CheckBinding(0))
2693			return ERROR;
2694
2695		return NO_ERROR;
2696	}
2697	virtual long Cleanup()
2698	{
2699		glUseProgram(0);
2700		glDeleteProgram(m_program);
2701		glDeleteBuffers(1, &m_storage_buffer);
2702		glDeleteBuffers(2, m_dispatch_buffer);
2703		return NO_ERROR;
2704	}
2705};
2706
2707class BasicSSOComputePipeline : public ComputeShaderBase
2708{
2709	virtual std::string Title()
2710	{
2711		return NL "Separable CS Programs - Compute and non-compute stages (1)";
2712	}
2713	virtual std::string Purpose()
2714	{
2715		return NL "1. Verify that compute and non-compute stages can be attached to one pipeline object." NL
2716				  "2. Verify that DrawArrays and ComputeDispatch commands works as expected in this case.";
2717	}
2718	virtual std::string Method()
2719	{
2720		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
2721				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
2722				  "4. Issue MemoryBarrier command." NL
2723				  "5. Issue DrawArrays command which uses data written by the compute stage." NL "6. Verify result.";
2724	}
2725	virtual std::string PassCriteria()
2726	{
2727		return NL "Everything works as expected.";
2728	}
2729
2730	GLuint m_vsp, m_fsp, m_csp;
2731	GLuint m_storage_buffer;
2732	GLuint m_vertex_array;
2733	GLuint m_pipeline;
2734
2735	virtual long Setup()
2736	{
2737		m_vsp = m_fsp = m_csp = 0;
2738		m_storage_buffer	  = 0;
2739		m_vertex_array		  = 0;
2740		m_pipeline			  = 0;
2741		return NO_ERROR;
2742	}
2743	virtual long Run()
2744	{
2745		const char* const glsl_cs = NL
2746			"layout(local_size_x = 4) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[4];" NL "};" NL
2747			"void main() {" NL "  const vec2 quad[4] = vec2[](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));" NL
2748			"  g_output[gl_GlobalInvocationID.x] = vec4(quad[gl_GlobalInvocationID.x], 0, 1);" NL "}";
2749		m_csp = CreateComputeProgram(glsl_cs);
2750		glProgramParameteri(m_csp, GL_PROGRAM_SEPARABLE, GL_TRUE);
2751		glLinkProgram(m_csp);
2752		if (!CheckProgram(m_csp))
2753			return ERROR;
2754
2755		const char* const glsl_vs =
2756			NL "layout(location = 0) in vec4 i_position;" NL "void main() {" NL "  gl_Position = i_position;" NL "}";
2757		m_vsp = BuildShaderProgram(GL_VERTEX_SHADER, glsl_vs);
2758		if (!CheckProgram(m_vsp))
2759			return ERROR;
2760
2761		const char* const glsl_fs = NL "layout(location = 0) out mediump vec4 o_color;" NL "void main() {" NL
2762									   "  o_color = vec4(0, 1, 0, 1);" NL "}";
2763		m_fsp = BuildShaderProgram(GL_FRAGMENT_SHADER, glsl_fs);
2764		if (!CheckProgram(m_fsp))
2765			return ERROR;
2766
2767		glGenProgramPipelines(1, &m_pipeline);
2768		glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, m_vsp);
2769		glUseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, m_fsp);
2770		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_csp);
2771
2772		glGenBuffers(1, &m_storage_buffer);
2773		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2774		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * 4, NULL, GL_DYNAMIC_DRAW);
2775		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2776
2777		glGenVertexArrays(1, &m_vertex_array);
2778		glBindVertexArray(m_vertex_array);
2779		glBindBuffer(GL_ARRAY_BUFFER, m_storage_buffer);
2780		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0);
2781		glBindBuffer(GL_ARRAY_BUFFER, 0);
2782		glEnableVertexAttribArray(0);
2783		glBindVertexArray(0);
2784
2785		glBindProgramPipeline(m_pipeline);
2786		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2787		glDispatchCompute(1, 1, 1);
2788
2789		glClear(GL_COLOR_BUFFER_BIT);
2790		glBindVertexArray(m_vertex_array);
2791		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
2792		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
2793
2794		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
2795			return ERROR;
2796		return NO_ERROR;
2797	}
2798
2799	virtual long Cleanup()
2800	{
2801		glDeleteProgram(m_vsp);
2802		glDeleteProgram(m_fsp);
2803		glDeleteProgram(m_csp);
2804		glDeleteBuffers(1, &m_storage_buffer);
2805		glDeleteVertexArrays(1, &m_vertex_array);
2806		glDeleteProgramPipelines(1, &m_pipeline);
2807		return NO_ERROR;
2808	}
2809};
2810
2811class BasicSSOCase2 : public ComputeShaderBase
2812{
2813	virtual std::string Title()
2814	{
2815		return NL "Separable CS Programs - Compute and non-compute stages (2)";
2816	}
2817	virtual std::string Purpose()
2818	{
2819		return NL "1. Verify that data computed by the compute stage is visible to non-compute stage after "
2820				  "MemoryBarrier command." NL "2. Verify that ProgramParameteri(program, GL_PROGRAM_SEPARABLE, "
2821				  "GL_TRUE) command works correctly for CS." NL
2822				  "3. Verify that gl_WorkGroupSize built-in variable is a contant and can be used as an array size.";
2823	}
2824	virtual std::string Method()
2825	{
2826		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
2827				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
2828				  "4. Issue MemoryBarrier command." NL
2829				  "5. Issue DrawArrays command which uses data written to the buffer object by the compute stage." NL
2830				  "6. Verify result.";
2831	}
2832	virtual std::string PassCriteria()
2833	{
2834		return NL "Everything works as expected.";
2835	}
2836
2837	GLuint m_program_ab;
2838	GLuint m_program_c;
2839	GLuint m_pipeline;
2840	GLuint m_storage_buffer;
2841	GLuint m_vao;
2842
2843	virtual long Setup()
2844	{
2845		m_program_ab	 = 0;
2846		m_program_c		 = 0;
2847		m_pipeline		 = 0;
2848		m_storage_buffer = 0;
2849		m_vao			 = 0;
2850		return NO_ERROR;
2851	}
2852	virtual long Run()
2853	{
2854		GLint res;
2855		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
2856		if (res <= 0)
2857		{
2858			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
2859			return NO_ERROR;
2860		}
2861
2862		const char* const glsl_a =
2863			"#version 310 es" NL "layout(binding = 1, std430) buffer Input {" NL "  mediump vec2 g_input[4];" NL "};" NL
2864			"flat out mediump vec3 color;" NL "void main() {" NL
2865			"  gl_Position = vec4(g_input[gl_VertexID], 0.0, 1.0);" NL "  color = vec3(0.0, 1.0, 0.0);" NL "}";
2866		const char* const glsl_b =
2867			"#version 310 es" NL "flat in mediump vec3 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL
2868			"void main() {" NL "  g_color = vec4(color, 1.0);" NL "}";
2869		const char* const glsl_c =
2870			"#version 310 es" NL "layout(local_size_x = 4) in;" NL "layout(binding = 1, std430) buffer Output {" NL
2871			"  vec2 g_output[gl_WorkGroupSize.x];" NL "};" NL "void main() {" NL
2872			"  if (gl_GlobalInvocationID.x == 0u) {" NL "    g_output[0] = vec2(-0.8, -0.8);" NL
2873			"  } else if (gl_GlobalInvocationID.x == 1u) {" NL "    g_output[1] = vec2(0.8, -0.8);" NL
2874			"  } else if (gl_GlobalInvocationID.x == 2u) {" NL "    g_output[2] = vec2(-0.8, 0.8);" NL
2875			"  } else if (gl_GlobalInvocationID.x == 3u) {" NL "    g_output[3] = vec2(0.8, 0.8);" NL "  }" NL "}";
2876
2877		m_program_ab = glCreateProgram();
2878		GLuint sh	= glCreateShader(GL_VERTEX_SHADER);
2879		glAttachShader(m_program_ab, sh);
2880		glDeleteShader(sh);
2881		glShaderSource(sh, 1, &glsl_a, NULL);
2882		glCompileShader(sh);
2883
2884		sh = glCreateShader(GL_FRAGMENT_SHADER);
2885		glAttachShader(m_program_ab, sh);
2886		glDeleteShader(sh);
2887		glShaderSource(sh, 1, &glsl_b, NULL);
2888		glCompileShader(sh);
2889
2890		glProgramParameteri(m_program_ab, GL_PROGRAM_SEPARABLE, GL_TRUE);
2891		glLinkProgram(m_program_ab);
2892
2893		m_program_c = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_c);
2894
2895		glGenVertexArrays(1, &m_vao);
2896		glGenProgramPipelines(1, &m_pipeline);
2897		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_ab);
2898		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_c);
2899
2900		glGenBuffers(1, &m_storage_buffer);
2901		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer);
2902		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec2) * 4, NULL, GL_STREAM_DRAW);
2903
2904		glClear(GL_COLOR_BUFFER_BIT);
2905		glBindProgramPipeline(m_pipeline);
2906		glDispatchCompute(1, 1, 1);
2907		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
2908		glBindVertexArray(m_vao);
2909		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
2910
2911		if (getWindowWidth() < 500 &&
2912			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
2913		{
2914			return ERROR;
2915		}
2916		return NO_ERROR;
2917	}
2918	virtual long Cleanup()
2919	{
2920		glDeleteProgram(m_program_ab);
2921		glDeleteProgram(m_program_c);
2922		glDeleteProgramPipelines(1, &m_pipeline);
2923		glDeleteBuffers(1, &m_storage_buffer);
2924		glDeleteVertexArrays(1, &m_vao);
2925		return NO_ERROR;
2926	}
2927};
2928
2929class BasicSSOCase3 : public ComputeShaderBase
2930{
2931	virtual std::string Title()
2932	{
2933		return NL "Separable CS Programs - Compute stage";
2934	}
2935	virtual std::string Purpose()
2936	{
2937		return NL "Verify that compute shader stage selected with UseProgram command has precedence" NL
2938				  "over compute shader stage selected with BindProgramPipeline command.";
2939	}
2940	virtual std::string Method()
2941	{
2942		return NL "1. Create CS0 with CreateProgram command. Create CS1 with CreateShaderProgramv command." NL
2943				  "2. Verify that CS program selected with UseProgram is dispatched even if there is active" NL
2944				  "    compute stage bound by BindProgramPipeline.";
2945	}
2946	virtual std::string PassCriteria()
2947	{
2948		return NL "Everything works as expected.";
2949	}
2950
2951	GLuint m_program_a;
2952	GLuint m_program_b;
2953	GLuint m_pipeline;
2954	GLuint m_storage_buffer;
2955
2956	virtual long Setup()
2957	{
2958		m_program_a		 = 0;
2959		m_program_b		 = 0;
2960		m_pipeline		 = 0;
2961		m_storage_buffer = 0;
2962		return NO_ERROR;
2963	}
2964	virtual long Run()
2965	{
2966		const char* const glsl_a =
2967			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
2968			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 1;" NL "}";
2969		const char* const glsl_b =
2970			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
2971			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 2;" NL "}";
2972		/* create program A */
2973		{
2974			m_program_a = glCreateProgram();
2975			GLuint sh   = glCreateShader(GL_COMPUTE_SHADER);
2976			glAttachShader(m_program_a, sh);
2977			glDeleteShader(sh);
2978			glShaderSource(sh, 1, &glsl_a, NULL);
2979			glCompileShader(sh);
2980			glProgramParameteri(m_program_a, GL_PROGRAM_SEPARABLE, GL_TRUE);
2981			glLinkProgram(m_program_a);
2982		}
2983		m_program_b = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_b);
2984
2985		/* create storage buffer */
2986		{
2987			int data = 0;
2988			glGenBuffers(1, &m_storage_buffer);
2989			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer);
2990			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &data, GL_STREAM_READ);
2991		}
2992
2993		glGenProgramPipelines(1, &m_pipeline);
2994		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_b);
2995
2996		glUseProgram(m_program_a);
2997		glBindProgramPipeline(m_pipeline);
2998		glDispatchCompute(1, 1, 1);
2999		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3000
3001		long error = NO_ERROR;
3002		{
3003			int* data;
3004			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3005			if (data[0] != 1)
3006			{
3007				m_context.getTestContext().getLog()
3008					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 1." << tcu::TestLog::EndMessage;
3009				error = ERROR;
3010			}
3011			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3012		}
3013
3014		glUseProgram(0);
3015		glDispatchCompute(1, 1, 1);
3016		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3017
3018		{
3019			int* data;
3020			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3021			if (data[0] != 2)
3022			{
3023				m_context.getTestContext().getLog()
3024					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 2." << tcu::TestLog::EndMessage;
3025				error = ERROR;
3026			}
3027			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3028		}
3029
3030		glUseProgram(m_program_b);
3031		glDispatchCompute(1, 1, 1);
3032		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3033
3034		{
3035			int* data;
3036			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3037			if (data[0] != 2)
3038			{
3039				m_context.getTestContext().getLog()
3040					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 2." << tcu::TestLog::EndMessage;
3041				error = ERROR;
3042			}
3043			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3044		}
3045
3046		glUseProgram(0);
3047		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_a);
3048		glDispatchCompute(1, 1, 1);
3049		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3050
3051		{
3052			int* data;
3053			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3054			if (data[0] != 1)
3055			{
3056				m_context.getTestContext().getLog()
3057					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 1." << tcu::TestLog::EndMessage;
3058				error = ERROR;
3059			}
3060			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3061		}
3062
3063		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3064		return error;
3065	}
3066	virtual long Cleanup()
3067	{
3068		glDeleteProgram(m_program_a);
3069		glDeleteProgram(m_program_b);
3070		glDeleteProgramPipelines(1, &m_pipeline);
3071		glDeleteBuffers(1, &m_storage_buffer);
3072		return NO_ERROR;
3073	}
3074};
3075
3076class BasicAtomicCase1 : public ComputeShaderBase
3077{
3078	virtual std::string Title()
3079	{
3080		return NL "Atomic functions";
3081	}
3082	virtual std::string Purpose()
3083	{
3084		return NL "1. Verify that atomicAdd function works as expected with int and uint parameters." NL
3085				  "2. Verify that shared memory can be used with atomic functions." NL
3086				  "3. Verify that groupMemoryBarrier() and barrier() built-in functions work as expected.";
3087	}
3088	virtual std::string Method()
3089	{
3090		return NL "1. Use shared memory as a 'counter' with-in one CS work group." NL
3091				  "2. Each shader invocation increments/decrements 'counter' value using atomicAdd function." NL
3092				  "3. Values returned by atomicAdd function are written to SSBO." NL
3093				  "4. Verify SSBO content (values from 0 to 7 should be written).";
3094	}
3095	virtual std::string PassCriteria()
3096	{
3097		return NL "Everything works as expected.";
3098	}
3099
3100	GLuint m_program;
3101	GLuint m_storage_buffer;
3102
3103	virtual long Setup()
3104	{
3105		m_program		 = 0;
3106		m_storage_buffer = 0;
3107		return NO_ERROR;
3108	}
3109	virtual long Run()
3110	{
3111		const char* const glsl_cs =
3112			NL "layout(local_size_x = 8) in;" NL "layout(std430, binding = 0) buffer Output {" NL
3113			   "  uint g_add_output[8];" NL "  int g_sub_output[8];" NL "};" NL "shared uint g_add_value;" NL
3114			   "shared int g_sub_value;" NL "void main() {" NL "  if (gl_LocalInvocationIndex == 0u) {" NL
3115			   "    g_add_value = 0u;" NL "    g_sub_value = 7;" NL "  }" NL
3116			   "  g_add_output[gl_LocalInvocationIndex] = 0u;" NL "  g_sub_output[gl_LocalInvocationIndex] = 0;" NL
3117			   "  groupMemoryBarrier();" NL "  barrier();" NL
3118			   "  g_add_output[gl_LocalInvocationIndex] = atomicAdd(g_add_value, 1u);" NL
3119			   "  g_sub_output[gl_LocalInvocationIndex] = atomicAdd(g_sub_value, -1);" NL "}";
3120		m_program = CreateComputeProgram(glsl_cs);
3121		glLinkProgram(m_program);
3122		if (!CheckProgram(m_program))
3123			return ERROR;
3124
3125		glGenBuffers(1, &m_storage_buffer);
3126		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3127		glBufferData(GL_SHADER_STORAGE_BUFFER, 16 * sizeof(int), NULL, GL_STATIC_DRAW);
3128
3129		glUseProgram(m_program);
3130		glDispatchCompute(1, 1, 1);
3131		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3132
3133		int* data;
3134		data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * 8, GL_MAP_READ_BIT));
3135		std::sort(data, data + 8);
3136		long error = NO_ERROR;
3137		for (int i = 0; i < 8; ++i)
3138		{
3139			if (data[i] != i)
3140			{
3141				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3142													<< data[i] << " should be " << i << tcu::TestLog::EndMessage;
3143				error = ERROR;
3144			}
3145		}
3146		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3147
3148		data = static_cast<int*>(
3149			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, sizeof(int) * 8, sizeof(int) * 8, GL_MAP_READ_BIT));
3150		std::sort(data, data + 8);
3151		for (int i = 0; i < 8; ++i)
3152		{
3153			if (data[i] != i)
3154			{
3155				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3156													<< data[i] << " should be " << i << tcu::TestLog::EndMessage;
3157				error = ERROR;
3158			}
3159		}
3160		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3161		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3162		return error;
3163	}
3164	virtual long Cleanup()
3165	{
3166		glUseProgram(0);
3167		glDeleteProgram(m_program);
3168		glDeleteBuffers(1, &m_storage_buffer);
3169		return NO_ERROR;
3170	}
3171};
3172
3173class BasicAtomicCase2 : public ComputeShaderBase
3174{
3175	virtual std::string Title()
3176	{
3177		return NL "Atomic functions - buffer variables";
3178	}
3179	virtual std::string Purpose()
3180	{
3181		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
3182				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with buffer variables." NL
3183				  "2. Verify that atomic functions work with parameters being constants and" NL
3184				  "    with parameters being uniforms." NL
3185				  "3. Verify that barrier() built-in function can be used in a control flow.";
3186	}
3187	virtual std::string Method()
3188	{
3189		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
3190				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
3191				  "3. Verify SSBO content." NL
3192				  "4. Repeat for different number of work groups and different work group sizes.";
3193	}
3194	virtual std::string PassCriteria()
3195	{
3196		return NL "Everything works as expected.";
3197	}
3198
3199	GLuint m_program;
3200	GLuint m_storage_buffer[2];
3201	GLuint m_dispatch_buffer;
3202
3203	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
3204	{
3205		const uvec3		  global_size = local_size * num_groups;
3206		std::stringstream ss;
3207		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
3208		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
3209		   << ", " << global_size.y() << ", " << global_size.z()
3210		   << ");" NL "layout(std430, binding = 0) buffer OutputU {" NL "  uint g_uint_out["
3211		   << global_size.x() * global_size.y() * global_size.z()
3212		   << "];" NL "};" NL "layout(std430, binding = 1) buffer OutputI {" NL "  int data["
3213		   << global_size.x() * global_size.y() * global_size.z()
3214		   << "];" NL "} g_int_out;" NL "uniform uint g_uint_value[8];" NL "void main() {" NL
3215			  "  uint global_index = gl_GlobalInvocationID.x +" NL
3216			  "                      gl_GlobalInvocationID.y * kGlobalSize.x +" NL
3217			  "                      gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
3218			  "  atomicExchange(g_uint_out[global_index], g_uint_value[0]);" NL
3219			  "  atomicMin(g_uint_out[global_index], g_uint_value[1]);" NL
3220			  "  atomicMax(g_uint_out[global_index], g_uint_value[2]);" NL
3221			  "  atomicAnd(g_uint_out[global_index], g_uint_value[3]);" NL
3222			  "  atomicOr(g_uint_out[global_index], g_uint_value[4]);" NL "  if (g_uint_value[0] > 0u) {" NL
3223			  "    barrier();" // not needed here, just check if compiler accepts it in a control flow
3224			NL "    atomicXor(g_uint_out[global_index], g_uint_value[5]);" NL "  }" NL
3225			  "  atomicCompSwap(g_uint_out[global_index], g_uint_value[6], g_uint_value[7]);" NL NL
3226			  "  atomicExchange(g_int_out.data[global_index], 3);" NL "  atomicMin(g_int_out.data[global_index], 1);" NL
3227			  "  atomicMax(g_int_out.data[global_index], 2);" NL "  atomicAnd(g_int_out.data[global_index], 0x1);" NL
3228			  "  atomicOr(g_int_out.data[global_index], 0x3);" NL "  atomicXor(g_int_out.data[global_index], 0x1);" NL
3229			  "  atomicCompSwap(g_int_out.data[global_index], 0x2, 0x7);" NL "}";
3230		return ss.str();
3231	}
3232	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
3233	{
3234		if (m_program != 0)
3235			glDeleteProgram(m_program);
3236		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
3237		glLinkProgram(m_program);
3238		if (!CheckProgram(m_program))
3239			return false;
3240
3241		const GLuint kBufferSize =
3242			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
3243
3244		if (m_storage_buffer[0] == 0)
3245			glGenBuffers(2, m_storage_buffer);
3246		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
3247		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
3248		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
3249		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
3250
3251		glUseProgram(m_program);
3252		GLuint values[8] = { 3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u };
3253		glUniform1uiv(glGetUniformLocation(m_program, "g_uint_value"), 8, values);
3254		if (dispatch_indirect)
3255		{
3256			if (m_dispatch_buffer == 0)
3257				glGenBuffers(1, &m_dispatch_buffer);
3258			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
3259			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
3260			glDispatchComputeIndirect(0);
3261		}
3262		else
3263		{
3264			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
3265		}
3266		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3267
3268		bool	res = true;
3269		GLuint* udata;
3270		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
3271		udata = static_cast<GLuint*>(
3272			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, GL_MAP_READ_BIT));
3273		for (GLuint i = 0; i < kBufferSize; ++i)
3274		{
3275			if (udata[i] != 7)
3276			{
3277				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
3278													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
3279				res = false;
3280			}
3281		}
3282		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3283		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3284
3285		GLint* idata;
3286		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
3287		idata = static_cast<GLint*>(
3288			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint) * kBufferSize, GL_MAP_READ_BIT));
3289		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
3290		{
3291			if (idata[i] != 7)
3292			{
3293				m_context.getTestContext().getLog() << tcu::TestLog::Message << "iData at index " << i << " is "
3294													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
3295				res = false;
3296			}
3297		}
3298		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3299		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3300		return res;
3301	}
3302	virtual long Setup()
3303	{
3304		m_program			= 0;
3305		m_storage_buffer[0] = m_storage_buffer[1] = 0;
3306		m_dispatch_buffer						  = 0;
3307		return NO_ERROR;
3308	}
3309	virtual long Run()
3310	{
3311		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
3312			return ERROR;
3313		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
3314			return ERROR;
3315		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
3316			return ERROR;
3317		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
3318			return ERROR;
3319		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
3320			return ERROR;
3321		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
3322			return ERROR;
3323		return NO_ERROR;
3324	}
3325	virtual long Cleanup()
3326	{
3327		glUseProgram(0);
3328		glDeleteProgram(m_program);
3329		glDeleteBuffers(2, m_storage_buffer);
3330		glDeleteBuffers(1, &m_dispatch_buffer);
3331		return NO_ERROR;
3332	}
3333};
3334
3335class BasicAtomicCase3 : public ComputeShaderBase
3336{
3337	virtual std::string Title()
3338	{
3339		return NL "Atomic functions - shared variables";
3340	}
3341	virtual std::string Purpose()
3342	{
3343		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
3344				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with shared variables." NL
3345				  "2. Verify that atomic functions work with parameters being constants and" NL
3346				  "    with parameters being uniforms." NL
3347				  "3. Verify that atomic functions can be used in a control flow.";
3348	}
3349	virtual std::string Method()
3350	{
3351		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
3352				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
3353				  "3. Verify SSBO content." NL
3354				  "4. Repeat for different number of work groups and different work group sizes.";
3355	}
3356	virtual std::string PassCriteria()
3357	{
3358		return NL "Everything works as expected.";
3359	}
3360
3361	GLuint m_program;
3362	GLuint m_storage_buffer;
3363	GLuint m_dispatch_buffer;
3364
3365	std::string GenSource(const uvec3& local_size)
3366	{
3367		std::stringstream ss;
3368		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
3369		   << ", local_size_z = " << local_size.z()
3370		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint g_uint_out["
3371		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "  int g_int_out["
3372		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "};" NL "shared uint g_shared_uint["
3373		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "shared int g_shared_int["
3374		   << local_size.x() * local_size.y() * local_size.z()
3375		   << "];" NL "uniform uint g_uint_value[8];" NL "void main() {" NL
3376			  "  atomicExchange(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[0]);" NL
3377			  "  atomicMin(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[1]);" NL
3378			  "  atomicMax(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[2]);" NL
3379			  "  atomicAnd(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[3]);" NL
3380			  "  atomicOr(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[4]);" NL
3381			  "  atomicXor(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[5]);" NL
3382			  "  atomicCompSwap(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[6], g_uint_value[7]);" NL NL
3383			  "  atomicExchange(g_shared_int[gl_LocalInvocationIndex], 3);" NL
3384			  "  atomicMin(g_shared_int[gl_LocalInvocationIndex], 1);" NL
3385			  "  atomicMax(g_shared_int[gl_LocalInvocationIndex], 2);" NL
3386			  "  atomicAnd(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL "  if (g_uint_value[1] > 0u) {" NL
3387			  "    atomicOr(g_shared_int[gl_LocalInvocationIndex], 0x3);" NL
3388			  "    atomicXor(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL
3389			  "    atomicCompSwap(g_shared_int[gl_LocalInvocationIndex], 0x2, 0x7);" NL "  }" NL NL
3390			  "  g_uint_out[gl_LocalInvocationIndex] = g_shared_uint[gl_LocalInvocationIndex];" NL
3391			  "  g_int_out[gl_LocalInvocationIndex] = g_shared_int[gl_LocalInvocationIndex];" NL "}";
3392		return ss.str();
3393	}
3394	bool RunIteration(const uvec3& local_size, bool dispatch_indirect)
3395	{
3396		if (m_program != 0)
3397			glDeleteProgram(m_program);
3398		m_program = CreateComputeProgram(GenSource(local_size));
3399		glLinkProgram(m_program);
3400		if (!CheckProgram(m_program))
3401			return false;
3402
3403		const GLuint kBufferSize = local_size.x() * local_size.y() * local_size.z();
3404
3405		if (m_storage_buffer == 0)
3406			glGenBuffers(1, &m_storage_buffer);
3407		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3408		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize * 2, NULL, GL_DYNAMIC_DRAW);
3409
3410		glUseProgram(m_program);
3411		GLuint values[8] = { 3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u };
3412		glUniform1uiv(glGetUniformLocation(m_program, "g_uint_value"), 8, values);
3413		if (dispatch_indirect)
3414		{
3415			const GLuint num_groups[3] = { 1, 1, 1 };
3416			if (m_dispatch_buffer == 0)
3417				glGenBuffers(1, &m_dispatch_buffer);
3418			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
3419			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
3420			glDispatchComputeIndirect(0);
3421		}
3422		else
3423		{
3424			glDispatchCompute(1, 1, 1);
3425		}
3426		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3427
3428		bool	ret = true;
3429		GLuint* udata;
3430		udata = static_cast<GLuint*>(
3431			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, GL_MAP_READ_BIT));
3432		for (GLuint i = 0; i < kBufferSize; ++i)
3433		{
3434			if (udata[i] != 7)
3435			{
3436				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
3437													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
3438				ret = false;
3439			}
3440		}
3441		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3442
3443		GLint* idata;
3444		idata = static_cast<GLint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize,
3445													 sizeof(GLint) * kBufferSize, GL_MAP_READ_BIT));
3446		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
3447		{
3448			if (idata[i] != 7)
3449			{
3450				m_context.getTestContext().getLog() << tcu::TestLog::Message << "iData at index " << i << " is "
3451													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
3452				ret = false;
3453			}
3454		}
3455		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3456		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3457
3458		return ret;
3459	}
3460	virtual long Setup()
3461	{
3462		m_program		  = 0;
3463		m_storage_buffer  = 0;
3464		m_dispatch_buffer = 0;
3465		return NO_ERROR;
3466	}
3467	virtual long Run()
3468	{
3469		if (!RunIteration(uvec3(64, 1, 1), false))
3470			return ERROR;
3471		if (!RunIteration(uvec3(1, 1, 64), true))
3472			return ERROR;
3473		if (!RunIteration(uvec3(1, 1, 4), false))
3474			return ERROR;
3475		if (!RunIteration(uvec3(3, 2, 1), true))
3476			return ERROR;
3477		if (!RunIteration(uvec3(2, 4, 2), false))
3478			return ERROR;
3479		if (!RunIteration(uvec3(2, 4, 7), true))
3480			return ERROR;
3481		return NO_ERROR;
3482	}
3483	virtual long Cleanup()
3484	{
3485		glUseProgram(0);
3486		glDeleteProgram(m_program);
3487		glDeleteBuffers(1, &m_storage_buffer);
3488		glDeleteBuffers(1, &m_dispatch_buffer);
3489		return NO_ERROR;
3490	}
3491};
3492
3493class AdvancedCopyImage : public ComputeShaderBase
3494{
3495	virtual std::string Title()
3496	{
3497		return NL "Copy Image";
3498	}
3499	virtual std::string Purpose()
3500	{
3501		return NL "Verify that copying two textures using CS works as expected.";
3502	}
3503	virtual std::string Method()
3504	{
3505		return NL "Use shader image load and store operations to copy two textures in the CS.";
3506	}
3507	virtual std::string PassCriteria()
3508	{
3509		return NL "Everything works as expected.";
3510	}
3511
3512	GLuint m_program;
3513	GLuint m_texture[2];
3514	GLuint m_fbo;
3515
3516	virtual long Setup()
3517	{
3518		m_program = 0;
3519		m_fbo	 = 0;
3520		memset(m_texture, 0, sizeof(m_texture));
3521		return NO_ERROR;
3522	}
3523	virtual long Run()
3524	{
3525		const char* const glsl_cs =
3526			NL "#define TILE_WIDTH 8" NL "#define TILE_HEIGHT 8" NL
3527			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
3528			   "layout(binding = 0, rgba8) readonly uniform mediump image2D g_input_image;" NL
3529			   "layout(binding = 1, rgba8) writeonly uniform mediump image2D g_output_image;" NL NL
3530			   "layout(local_size_x=TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL				 NL "void main() {" NL
3531			   "  ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
3532			   "  ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL NL
3533			   "  vec4 pixel = imageLoad(g_input_image, pixel_xy);" NL
3534			   "  imageStore(g_output_image, pixel_xy, pixel);" NL "}";
3535		m_program = CreateComputeProgram(glsl_cs);
3536		glLinkProgram(m_program);
3537		if (!CheckProgram(m_program))
3538			return ERROR;
3539
3540		std::vector<GLubyte> in_image(64 * 64 * 4, 0x0f);
3541		std::vector<GLubyte> out_image(64 * 64 * 4, 0xff);
3542
3543		glGenTextures(2, m_texture);
3544		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
3545		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3546		glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 64, 64);
3547		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, &in_image[0]);
3548
3549		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
3550		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3551		glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 64, 64);
3552		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, &out_image[0]);
3553
3554		glUseProgram(m_program);
3555		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);
3556		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
3557		glDispatchCompute(9, 8,
3558						  1); // 9 is on purpose, to ensure that out of bounds image load and stores have no effect
3559		glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
3560
3561		std::vector<GLubyte> data(64 * 64 * 4);
3562		glGenFramebuffers(1, &m_fbo);
3563		glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
3564		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture[1], 0);
3565		glReadPixels(0, 0, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
3566		for (std::size_t i = 0; i < data.size(); ++i)
3567		{
3568			if (data[i] != 0x0f)
3569			{
3570				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3571													<< data[i] << " should be " << 0x0f << tcu::TestLog::EndMessage;
3572				return ERROR;
3573			}
3574		}
3575
3576		return NO_ERROR;
3577	}
3578	virtual long Cleanup()
3579	{
3580		glUseProgram(0);
3581		glDeleteProgram(m_program);
3582		glDeleteFramebuffers(1, &m_fbo);
3583		glDeleteTextures(2, m_texture);
3584		return NO_ERROR;
3585	}
3586};
3587
3588class AdvancedPipelinePreVS : public ComputeShaderBase
3589{
3590	virtual std::string Title()
3591	{
3592		return NL "CS as an additional pipeline stage - Before VS (1)";
3593	}
3594	virtual std::string Purpose()
3595	{
3596		return NL "Verify that CS which runs just before VS and modifies VBO content works as expected.";
3597	}
3598	virtual std::string Method()
3599	{
3600		return NL "1. Prepare VBO and VAO for a drawing operation." NL "2. Run CS to modify existing VBO content." NL
3601				  "3. Issue MemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT) command." NL
3602				  "4. Issue draw call command." NL "5. Verify that the framebuffer content is as expected.";
3603	}
3604	virtual std::string PassCriteria()
3605	{
3606		return NL "Everything works as expected.";
3607	}
3608
3609	GLuint m_program[2];
3610	GLuint m_vertex_buffer;
3611	GLuint m_vertex_array;
3612
3613	virtual long Setup()
3614	{
3615		memset(m_program, 0, sizeof(m_program));
3616		m_vertex_buffer = 0;
3617		m_vertex_array  = 0;
3618		return NO_ERROR;
3619	}
3620	virtual long Run()
3621	{
3622		const char* const glsl_cs = NL "layout(local_size_x = 4) in;" NL "struct Vertex {" NL "  vec4 position;" NL
3623									   "  vec4 color;" NL "};" NL "layout(binding = 0, std430) buffer VertexBuffer {" NL
3624									   "  Vertex g_vertex[];" NL "};" NL "uniform float g_scale;" NL "void main() {" NL
3625									   "  g_vertex[gl_GlobalInvocationID.x].position.xyz *= g_scale;" NL
3626									   "  g_vertex[gl_GlobalInvocationID.x].color *= vec4(0.0, 1.0, 0.0, 1.0);" NL "}";
3627		m_program[0] = CreateComputeProgram(glsl_cs);
3628		glLinkProgram(m_program[0]);
3629		glUseProgram(m_program[0]);
3630		glUniform1f(glGetUniformLocation(m_program[0], "g_scale"), 0.8f);
3631		glUseProgram(0);
3632		if (!CheckProgram(m_program[0]))
3633			return ERROR;
3634
3635		const char* const glsl_vs =
3636			NL "layout(location = 0) in mediump vec4 g_position;" NL "layout(location = 1) in mediump vec4 g_color;" NL
3637			   "flat out mediump vec4 color;" NL "void main() {" NL "  gl_Position = g_position;" NL
3638			   "  color = g_color;" NL "}";
3639		const char* const glsl_fs =
3640			NL "flat in mediump vec4 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
3641			   "  g_color = color;" NL "}";
3642		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
3643		glLinkProgram(m_program[1]);
3644		if (!CheckProgram(m_program[1]))
3645			return ERROR;
3646
3647		/* vertex buffer */
3648		{
3649			const float data[] = { -1, -1, 0, 1, 1, 1, 1, 1, 1, -1, 0, 1, 1, 1, 1, 1,
3650								   -1, 1,  0, 1, 1, 1, 1, 1, 1, 1,  0, 1, 1, 1, 1, 1 };
3651			glGenBuffers(1, &m_vertex_buffer);
3652			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3653			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
3654			glBindBuffer(GL_ARRAY_BUFFER, 0);
3655		}
3656
3657		glGenVertexArrays(1, &m_vertex_array);
3658		glBindVertexArray(m_vertex_array);
3659		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3660		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
3661		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
3662		glBindBuffer(GL_ARRAY_BUFFER, 0);
3663		glEnableVertexAttribArray(0);
3664		glEnableVertexAttribArray(1);
3665		glBindVertexArray(0);
3666
3667		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_vertex_buffer);
3668		glUseProgram(m_program[0]);
3669		glDispatchCompute(1, 1, 1);
3670
3671		glClear(GL_COLOR_BUFFER_BIT);
3672		glUseProgram(m_program[1]);
3673		glBindVertexArray(m_vertex_array);
3674		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
3675		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
3676
3677		if (getWindowWidth() < 500 &&
3678			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
3679		{
3680			return ERROR;
3681		}
3682		return NO_ERROR;
3683	}
3684	virtual long Cleanup()
3685	{
3686		glUseProgram(0);
3687		for (int i = 0; i < 2; ++i)
3688			glDeleteProgram(m_program[i]);
3689		glDeleteBuffers(1, &m_vertex_buffer);
3690		glDeleteVertexArrays(1, &m_vertex_array);
3691		return NO_ERROR;
3692	}
3693};
3694
3695class AdvancedPipelineGenDrawCommands : public ComputeShaderBase
3696{
3697	virtual std::string Title()
3698	{
3699		return NL "CS as an additional pipeline stage - Before VS (2)";
3700	}
3701	virtual std::string Purpose()
3702	{
3703		return NL "Verify that a complex scenario where CS is used to generate drawing commands" NL
3704				  "and write them to a draw indirect buffer works as expected. This is a practial usage of CS." NL
3705				  "CS is used for culling objects which are outside of the viewing frustum.";
3706	}
3707	virtual std::string Method()
3708	{
3709		return NL "1. Run CS which will generate four sets of draw call parameters and write them to the draw indirect "
3710				  "buffer." NL "2. One set of draw call parameters will be: 0, 0, 0, 0" NL
3711				  "    (which means that an object is outside of the viewing frustum and should not be drawn)." NL
3712				  "3. Issue MemoryBarrier(GL_COMMAND_BARRIER_BIT) command." NL
3713				  "4. Issue four draw indirect commands." NL "5. Verify that the framebuffer content is as expected.";
3714	}
3715	virtual std::string PassCriteria()
3716	{
3717		return NL "Everything works as expected.";
3718	}
3719
3720	GLuint m_program[2];
3721	GLuint m_vertex_buffer;
3722	GLuint m_index_buffer;
3723	GLuint m_vertex_array;
3724	GLuint m_draw_buffer;
3725	GLuint m_object_buffer;
3726
3727	virtual long Setup()
3728	{
3729		memset(m_program, 0, sizeof(m_program));
3730		m_vertex_buffer = 0;
3731		m_index_buffer  = 0;
3732		m_vertex_array  = 0;
3733		m_draw_buffer   = 0;
3734		m_object_buffer = 0;
3735		return NO_ERROR;
3736	}
3737	virtual long Run()
3738	{
3739		GLint res;
3740		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
3741		if (res <= 0)
3742		{
3743			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
3744			return NOT_SUPPORTED;
3745		}
3746
3747		const char* const glsl_cs =
3748			NL "layout(local_size_x = 4) in;" NL "struct DrawCommand {" NL "  uint count;" NL
3749			   "  uint instance_count;" NL "  uint first_index;" NL "  int base_vertex;" NL "  uint base_instance;" NL
3750			   "};" NL "layout(std430) buffer;" NL "layout(binding = 0) readonly buffer ObjectBuffer {" NL
3751			   "  mat4 transform[4];" NL "  uint count[4];" NL "  uint first_index[4];" NL "} g_objects;" NL
3752			   "layout(binding = 1) writeonly buffer DrawCommandBuffer {" NL "  DrawCommand g_command[4];" NL "};" NL
3753			   "bool IsObjectVisible(uint id) {" NL
3754			   "  if (g_objects.transform[id][3].x < -1.0 || g_objects.transform[id][3].x > 1.0) return false;" NL
3755			   "  if (g_objects.transform[id][3][1] < -1.0 || g_objects.transform[id][3][1] > 1.0) return false;" NL
3756			   "  if (g_objects.transform[id][3][2] < -1.0 || g_objects.transform[id][3].z > 1.0) return false;" NL
3757			   "  return true;" NL "}" NL "void main() {" NL "  uint id = gl_GlobalInvocationID.x;" NL
3758			   "  g_command[id].count = 0u;" NL "  g_command[id].instance_count = 0u;" NL
3759			   "  g_command[id].first_index = 0u;" NL "  g_command[id].base_vertex = int(0);" NL
3760			   "  g_command[id].base_instance = 0u;" NL "  if (IsObjectVisible(id)) {" NL
3761			   "    g_command[id].count = g_objects.count[id];" NL "    g_command[id].instance_count = 1u;" NL
3762			   "    g_command[id].first_index = g_objects.first_index[id];" NL "  }" NL "}";
3763		m_program[0] = CreateComputeProgram(glsl_cs);
3764		glLinkProgram(m_program[0]);
3765		if (!CheckProgram(m_program[0]))
3766			return ERROR;
3767
3768		const char* const glsl_vs =
3769			NL "layout(location = 0) in mediump vec4 g_position;" NL "layout(location = 1) in mediump vec3 g_color;" NL
3770			   "flat out mediump vec3 color;" NL "layout(binding = 0, std430) buffer ObjectBuffer {" NL
3771			   "  mediump mat4 transform[4];" NL "  uint count[4];" NL "  uint first_index[4];" NL "} g_objects;" NL
3772			   "uniform int g_object_id;" NL "void main() {" NL
3773			   "  gl_Position = g_objects.transform[g_object_id] * g_position;" NL "  color = g_color;" NL "}";
3774		const char* const glsl_fs =
3775			NL "flat in mediump vec3 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
3776			   "  g_color = vec4(color, 1.0);" NL "}";
3777		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
3778		glLinkProgram(m_program[1]);
3779		if (!CheckProgram(m_program[1]))
3780			return ERROR;
3781		glViewport(0, 0, 100, 100);
3782
3783		/* object buffer */
3784		{
3785			struct
3786			{
3787				mat4   transform[4];
3788				GLuint count[4];
3789				GLuint first_index[4];
3790			} data = {
3791				{ tcu::translationMatrix(vec3(-1.5f, -0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, -0.5f, 0.0f)),
3792				  tcu::translationMatrix(vec3(-0.5f, 0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, 0.5f, 0.0f)) },
3793				{ 4, 4, 4, 4 },
3794				{ 0, 4, 8, 12 }
3795			};
3796			glGenBuffers(1, &m_object_buffer);
3797			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_object_buffer);
3798			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
3799		}
3800		/* vertex buffer */
3801		{
3802			const vec3 data[] = { vec3(-0.4f, -0.4f, 0.0f), vec3(1, 0, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 0, 0),
3803								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 0, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 0, 0),
3804								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(0, 1, 0),
3805								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 1, 0),
3806								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 0, 1), vec3(0.4f, -0.4f, 0.0f), vec3(0, 0, 1),
3807								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 0, 1), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 0, 1),
3808								  vec3(-0.4f, -0.4f, 0.0f), vec3(1, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 1, 0),
3809								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 1, 0) };
3810			glGenBuffers(1, &m_vertex_buffer);
3811			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3812			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
3813			glBindBuffer(GL_ARRAY_BUFFER, 0);
3814		}
3815		/* index buffer */
3816		{
3817			const GLushort data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
3818			glGenBuffers(1, &m_index_buffer);
3819			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
3820			glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW);
3821			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
3822		}
3823		glGenBuffers(1, &m_draw_buffer);
3824		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
3825		glBufferData(GL_DRAW_INDIRECT_BUFFER, 4 * sizeof(GLuint) * 5, NULL, GL_DYNAMIC_DRAW);
3826		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
3827
3828		glGenVertexArrays(1, &m_vertex_array);
3829		glBindVertexArray(m_vertex_array);
3830		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3831		glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), 0);
3832		glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), reinterpret_cast<void*>(sizeof(vec3)));
3833		glBindBuffer(GL_ARRAY_BUFFER, 0);
3834		glEnableVertexAttribArray(0);
3835		glEnableVertexAttribArray(1);
3836		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
3837		glBindVertexArray(0);
3838
3839		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_draw_buffer);
3840		glUseProgram(m_program[0]);
3841		glDispatchCompute(1, 1, 1);
3842
3843		glClear(GL_COLOR_BUFFER_BIT);
3844		glUseProgram(m_program[1]);
3845		glBindVertexArray(m_vertex_array);
3846		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
3847		glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
3848		/* draw (CPU draw calls dispatch, could be done by the GPU with ARB_multi_draw_indirect) */
3849		{
3850			GLsizeiptr offset = 0;
3851			for (int i = 0; i < 4; ++i)
3852			{
3853				glUniform1i(glGetUniformLocation(m_program[1], "g_object_id"), i);
3854				glDrawElementsIndirect(GL_TRIANGLE_STRIP, GL_UNSIGNED_SHORT, reinterpret_cast<void*>(offset));
3855				offset += 5 * sizeof(GLuint);
3856			}
3857		}
3858		if (getWindowWidth() >= 100 && getWindowHeight() >= 100 &&
3859			!ValidateWindow4Quads(vec3(0), vec3(0, 1, 0), vec3(1, 1, 0), vec3(0, 0, 1)))
3860		{
3861			return ERROR;
3862		}
3863		return NO_ERROR;
3864	}
3865	virtual long Cleanup()
3866	{
3867		glUseProgram(0);
3868		for (int i = 0; i < 2; ++i)
3869			glDeleteProgram(m_program[i]);
3870		glDeleteBuffers(1, &m_vertex_buffer);
3871		glDeleteBuffers(1, &m_index_buffer);
3872		glDeleteVertexArrays(1, &m_vertex_array);
3873		glDeleteBuffers(1, &m_draw_buffer);
3874		glDeleteBuffers(1, &m_object_buffer);
3875		glViewport(0, 0, getWindowWidth(), getWindowHeight());
3876		return NO_ERROR;
3877	}
3878};
3879
3880class AdvancedPipelineComputeChain : public ComputeShaderBase
3881{
3882	virtual std::string Title()
3883	{
3884		return NL "Compute Chain";
3885	}
3886	virtual std::string Purpose()
3887	{
3888		return NL "1. Verify that dispatching several compute kernels that work in a sequence" NL
3889				  "    with a common set of resources works as expected." NL
3890				  "2. Verify that indexing nested structures with built-in variables work as expected." NL
3891				  "3. Verify that two kernels can write to the same resource without MemoryBarrier" NL
3892				  "    command if target regions of memory do not overlap.";
3893	}
3894	virtual std::string Method()
3895	{
3896		return NL "1. Create a set of GPU resources (buffers, images, atomic counters)." NL
3897				  "2. Dispatch Kernel0 that write to these resources." NL "3. Issue MemoryBarrier command." NL
3898				  "4. Dispatch Kernel1 that read/write from/to these resources." NL "5. Issue MemoryBarrier command." NL
3899				  "6. Dispatch Kernel2 that read/write from/to these resources." NL
3900				  "7. Verify that content of all resources is as expected.";
3901	}
3902	virtual std::string PassCriteria()
3903	{
3904		return NL "Everything works as expected.";
3905	}
3906
3907	GLuint m_program[3];
3908	GLuint m_storage_buffer[4];
3909	GLuint m_counter_buffer;
3910	GLuint m_texture;
3911	GLuint m_fbo;
3912
3913	std::string Common()
3914	{
3915		return NL "precision highp image2D;" NL "struct S0 {" NL "  int m0[8];" NL "};" NL "struct S1 {" NL
3916				  "  S0 m0[8];" NL "};" NL "layout(binding = 0, std430) buffer Buffer0 {" NL "  int m0[5];" NL
3917				  "  S1 m1[8];" NL "} g_buffer0;" NL "layout(binding = 1, std430) buffer Buffer1 {" NL
3918				  "  uint data[8];" NL "} g_buffer1;" NL "layout(binding = 2, std430) buffer Buffer2 {" NL
3919				  "  int data[256];" NL "} g_buffer2;" NL "layout(binding = 3, std430) buffer Buffer3 {" NL
3920				  "  int data[256];" NL "} g_buffer3;" NL "layout(binding = 4, std430) buffer Buffer4 {" NL
3921				  "  mat4 data0;" NL "  mat4 data1;" NL "} g_buffer4;" NL
3922				  "layout(binding = 0, rgba8) writeonly uniform mediump image2D g_image0;" NL
3923				  "layout(binding = 0, offset = 8) uniform atomic_uint g_counter[2];";
3924	}
3925	std::string GenGLSL(int p)
3926	{
3927		std::stringstream ss;
3928		ss << Common();
3929		if (p == 0)
3930		{
3931			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL
3932					 "void UpdateBuffer0(uvec3 id, int add_val) {" NL "  if (id.x < 8u && id.y < 8u && id.z < 8u) {" NL
3933					 "    g_buffer0.m1[id.z].m0[id.y].m0[id.x] += add_val;" NL "  }" NL "}" NL
3934					 "uniform int g_add_value;" NL "uniform uint g_counter_y;" NL "uniform vec4 g_image_value;" NL
3935					 "void main() {" NL "  uvec3 id = gl_GlobalInvocationID;" NL "  UpdateBuffer0(id, 1);" NL
3936					 "  UpdateBuffer0(id, g_add_value);" NL "  if (id == uvec3(1, g_counter_y, 1)) {" NL
3937					 "    uint idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL
3938					 "    idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL "  }" NL
3939					 "  if (id.x < 4u && id.y < 4u && id.z == 0u) {" NL
3940					 "    imageStore(g_image0, ivec2(id.xy), g_image_value);" NL "  }" NL
3941					 "  if (id.x < 2u && id.y == 0u && id.z == 0u) {" NL
3942					 "    g_buffer2.data[id.x] -= int(g_counter_y);" NL "  }" NL "}";
3943		}
3944		else if (p == 1)
3945		{
3946			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in;"
3947				// translation matrix
3948				NL "uniform mat4 g_mvp;" NL "void main() {" NL "  if (gl_GlobalInvocationID == uvec3(0)) {" NL
3949					 "    g_buffer4.data0 *= g_mvp;" NL "  }" NL "  if (gl_WorkGroupID == uvec3(0)) {" NL
3950					 "    g_buffer4.data1[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = "
3951					 "g_mvp[gl_LocalInvocationID.x][gl_LocalInvocationID.y];" NL "  }" NL "}";
3952		}
3953		else if (p == 2)
3954		{
3955			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL "void main() {" NL "}";
3956		}
3957		return ss.str();
3958	}
3959	virtual long Setup()
3960	{
3961		memset(m_program, 0, sizeof(m_program));
3962		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
3963		m_counter_buffer = 0;
3964		m_texture		 = 0;
3965		m_fbo			 = 0;
3966		return NO_ERROR;
3967	}
3968	virtual long Run()
3969	{
3970		using namespace tcu;
3971
3972		for (int i = 0; i < 3; ++i)
3973		{
3974			m_program[i] = CreateComputeProgram(GenGLSL(i));
3975			glLinkProgram(m_program[i]);
3976			if (i == 0)
3977			{
3978				glUseProgram(m_program[i]);
3979				glUniform1i(glGetUniformLocation(m_program[i], "g_add_value"), 1);
3980				glUniform1ui(glGetUniformLocation(m_program[i], "g_counter_y"), 1u);
3981				glUniform4f(glGetUniformLocation(m_program[i], "g_image_value"), 0.25f, 0.5f, 0.75f, 1.0f);
3982				glUseProgram(0);
3983			}
3984			else if (i == 1)
3985			{
3986				glUseProgram(m_program[i]);
3987				GLfloat values[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f,  1.0f,  0.0f,  0.0f,
3988									   0.0f, 0.0f, 1.0f, 0.0f, 10.0f, 20.0f, 30.0f, 1.0f };
3989				glUniformMatrix4fv(glGetUniformLocation(m_program[i], "g_mvp"), 1, GL_FALSE, values);
3990				glUseProgram(0);
3991			}
3992			if (!CheckProgram(m_program[i]))
3993				return ERROR;
3994		}
3995
3996		glGenBuffers(4, m_storage_buffer);
3997		/* storage buffer 0 */
3998		{
3999			std::vector<int> data(5 + 8 * 8 * 8);
4000			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
4001			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(int)), &data[0], GL_STATIC_COPY);
4002		}
4003		/* storage buffer 1 */
4004		{
4005			const GLuint data[8] = { 0 };
4006			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
4007			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_STATIC_COPY);
4008		}
4009		/* storage buffer 2 & 3 */
4010		{
4011			std::vector<GLint> data(512, 7);
4012			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
4013			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(GLint)), &data[0], GL_STATIC_COPY);
4014
4015			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_storage_buffer[2], 0,
4016							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
4017			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer[2],
4018							  (GLintptr)(sizeof(GLint) * data.size() / 2),
4019							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
4020		}
4021		/* storage buffer 4 */
4022		{
4023			std::vector<mat4> data(2);
4024			data[0] = mat4(1);
4025			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_storage_buffer[3]);
4026			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(mat4)), &data[0], GL_STATIC_COPY);
4027		}
4028		/* counter buffer */
4029		{
4030			GLuint data[4] = { 0 };
4031			glGenBuffers(1, &m_counter_buffer);
4032			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counter_buffer);
4033			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), data, GL_STATIC_COPY);
4034		}
4035		/* texture */
4036		{
4037			std::vector<GLint> data(4 * 4 * 4, 0);
4038			glGenTextures(1, &m_texture);
4039			glBindTexture(GL_TEXTURE_2D, m_texture);
4040			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4041			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4042			glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 4, 4);
4043			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
4044			glBindTexture(GL_TEXTURE_2D, 0);
4045		}
4046
4047		glUseProgram(m_program[0]);
4048		glBindImageTexture(0, m_texture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
4049		glDispatchCompute(2, 2, 2);
4050		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4051		glDispatchCompute(3, 2, 2);
4052
4053		glUseProgram(m_program[1]);
4054		glDispatchCompute(4, 3, 7);
4055
4056		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
4057						GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4058
4059		long error = NO_ERROR;
4060		/* validate storage buffer 0 */
4061		{
4062			int* data;
4063			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
4064			data = static_cast<int*>(
4065				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * (5 + 8 * 8 * 8), GL_MAP_READ_BIT));
4066			for (std::size_t i = 5; i < 5 + 8 * 8 * 8; ++i)
4067			{
4068				if (data[i] != 4)
4069				{
4070					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4071														<< " should be: 2." << tcu::TestLog::EndMessage;
4072					error = ERROR;
4073				}
4074			}
4075			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4076			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4077		}
4078		/* validate storage buffer 1 */
4079		{
4080			GLuint* data;
4081			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
4082			data = static_cast<GLuint*>(
4083				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * 8, GL_MAP_READ_BIT));
4084			for (GLuint i = 0; i < 4; ++i)
4085			{
4086				if (data[i] != i)
4087				{
4088					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4089														<< " should be: " << i << tcu::TestLog::EndMessage;
4090					error = ERROR;
4091				}
4092			}
4093			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4094			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4095		}
4096		/* validate storage buffer 2 & 3 */
4097		{
4098			GLint* data;
4099			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
4100			data = static_cast<GLint*>(
4101				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint) * 512, GL_MAP_READ_BIT));
4102			for (int i = 0; i < 2; ++i)
4103			{
4104				if (data[i] != 5)
4105				{
4106					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4107														<< " should be: 5." << tcu::TestLog::EndMessage;
4108					error = ERROR;
4109				}
4110				if (data[i + 256] != 7)
4111				{
4112					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i + 256]
4113														<< " should be: 7." << tcu::TestLog::EndMessage;
4114					error = ERROR;
4115				}
4116			}
4117			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4118			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4119		}
4120		/* validate storage buffer 4 */
4121		{
4122			mat4* data;
4123			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[3]);
4124			data = static_cast<mat4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(mat4) * 2, GL_MAP_READ_BIT));
4125			if (transpose(data[1]) != translationMatrix(vec3(10.0f, 20.0f, 30.0f)))
4126			{
4127				m_context.getTestContext().getLog()
4128					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
4129				error = ERROR;
4130			}
4131			if (transpose(data[0]) != transpose(translationMatrix(vec3(10.0f, 20.0f, 30.0f))))
4132			{
4133				m_context.getTestContext().getLog()
4134					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
4135				error = ERROR;
4136			}
4137			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4138			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4139		}
4140		/* validate counter buffer */
4141		{
4142			GLuint* data;
4143			data = static_cast<GLuint*>(
4144				glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 4, GL_MAP_READ_BIT));
4145			if (data[3] != 4)
4146			{
4147				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[3]
4148													<< " should be: " << 4 << tcu::TestLog::EndMessage;
4149				error = ERROR;
4150			}
4151			glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
4152		}
4153		/* validate texture */
4154		{
4155			std::vector<vec4> data(4 * 4);
4156			glBindTexture(GL_TEXTURE_2D, m_texture);
4157			glGenFramebuffers(1, &m_fbo);
4158			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4159			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture, 0);
4160			std::vector<GLubyte> colorData(4 * 4 * 4);
4161			glReadPixels(0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4162			for (int i = 0; i < 4 * 4 * 4; i += 4)
4163			{
4164				data[i / 4] =
4165					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4166						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4167			}
4168			vec4 epsilon = vec4(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f); // texture format is RGBA8.
4169			for (std::size_t i = 0; i < data.size(); ++i)
4170			{
4171				if (!ColorEqual(data[i], vec4(0.25f, 0.5f, 0.75f, 1.0f), epsilon))
4172				{
4173					m_context.getTestContext().getLog()
4174						<< tcu::TestLog::Message << "Invalid data at texture." << tcu::TestLog::EndMessage;
4175					return ERROR;
4176				}
4177			}
4178		}
4179
4180		return error;
4181	}
4182	virtual long Cleanup()
4183	{
4184		glUseProgram(0);
4185		for (int i = 0; i < 3; ++i)
4186			glDeleteProgram(m_program[i]);
4187		glDeleteBuffers(4, m_storage_buffer);
4188		glDeleteBuffers(1, &m_counter_buffer);
4189		glDeleteTextures(1, &m_texture);
4190		glDeleteFramebuffers(1, &m_fbo);
4191		return NO_ERROR;
4192	}
4193};
4194
4195class AdvancedPipelinePostFS : public ComputeShaderBase
4196{
4197	virtual std::string Title()
4198	{
4199		return NL "CS as an additional pipeline stage - After FS";
4200	}
4201	virtual std::string Purpose()
4202	{
4203		return NL "1. Verify that CS which runs just after FS to do a post-processing on a rendered image works as "
4204				  "expected." NL "2. Verify that CS used as a post-processing filter works as expected." NL
4205				  "3. Verify that several CS kernels which run in a sequence to do a post-processing on a rendered "
4206				  "image works as expected.";
4207	}
4208	virtual std::string Method()
4209	{
4210		return NL
4211			"1. Render image to Texture0 using VS and FS." NL
4212			"2. Use Texture0 as an input to Kernel0 which performs post-processing and writes result to Texture1." NL
4213			"3. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
4214			"4. Use Texture1 as an input to Kernel1 which performs post-processing and writes result to Texture0." NL
4215			"5. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
4216			"6. Verify content of the final post-processed image (Texture0).";
4217	}
4218	virtual std::string PassCriteria()
4219	{
4220		return NL "Everything works as expected.";
4221	}
4222
4223	GLuint m_program[3];
4224	GLuint m_render_target[2];
4225	GLuint m_framebuffer;
4226	GLuint m_vertex_array;
4227	GLuint m_fbo;
4228
4229	virtual long Setup()
4230	{
4231		memset(m_program, 0, sizeof(m_program));
4232		memset(m_render_target, 0, sizeof(m_render_target));
4233		m_framebuffer  = 0;
4234		m_vertex_array = 0;
4235		m_fbo		   = 0;
4236		return NO_ERROR;
4237	}
4238
4239	virtual long Run()
4240	{
4241		const char* const glsl_vs =
4242			NL "const mediump vec2 g_vertex[4] = vec2[4](vec2(0.0), vec2(-1.0, -1.0), vec2(3.0, -1.0), vec2(-1.0, "
4243			   "3.0));" NL "void main() {" NL "  gl_Position = vec4(g_vertex[gl_VertexID], 0.0, 1.0);" NL "}";
4244		const char* const glsl_fs = NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
4245									   "  g_color = vec4(1.0, 0.0, 0.0, 1.0);" NL "}";
4246		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
4247		glLinkProgram(m_program[0]);
4248		if (!CheckProgram(m_program[0]))
4249			return ERROR;
4250
4251		const char* const glsl_cs =
4252			NL "#define TILE_WIDTH 4" NL "#define TILE_HEIGHT 4" NL
4253			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
4254			   "layout(binding = 0, rgba8) readonly uniform mediump image2D g_input_image;" NL
4255			   "layout(binding = 1, rgba8) writeonly uniform mediump image2D g_output_image;" NL NL
4256			   "layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL				 NL "void main() {" NL
4257			   "  ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL NL
4258			   "  if (thread_xy == ivec2(0)) {" NL "    ivec2 pixel_xy = tile_xy * kTileSize;" NL
4259			   "    for (int y = 0; y < TILE_HEIGHT; ++y) {" NL "      for (int x = 0; x < TILE_WIDTH; ++x) {" NL
4260			   "        imageStore(g_output_image, pixel_xy + ivec2(x, y), vec4(0, 1, 0, 1));" NL "      }" NL
4261			   "    }" NL "  }" NL "}";
4262
4263		m_program[1] = CreateComputeProgram(glsl_cs);
4264		glLinkProgram(m_program[1]);
4265		if (!CheckProgram(m_program[1]))
4266			return ERROR;
4267
4268		const char* const glsl_cs2 =
4269			NL "#define TILE_WIDTH 8" NL "#define TILE_HEIGHT 8" NL
4270			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
4271			   "layout(binding = 0, rgba8) readonly uniform mediump image2D g_input_image;" NL
4272			   "layout(binding = 1, rgba8) writeonly uniform mediump image2D g_output_image;" NL NL
4273			   "layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL "vec4 Process(vec4 ic) {" NL
4274			   "  return ic + vec4(1.0, 0.0, 0.0, 0.0);" NL "}" NL "void main() {" NL
4275			   "  ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
4276			   "  ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL
4277			   "  vec4 ic = imageLoad(g_input_image, pixel_xy);" NL
4278			   "  imageStore(g_output_image, pixel_xy, Process(ic));" NL "}";
4279
4280		m_program[2] = CreateComputeProgram(glsl_cs2);
4281		glLinkProgram(m_program[2]);
4282		if (!CheckProgram(m_program[2]))
4283			return ERROR;
4284
4285		glGenVertexArrays(1, &m_vertex_array);
4286
4287		/* init render targets */
4288		{
4289			std::vector<GLint> data(128 * 128 * 4);
4290			glGenTextures(2, m_render_target);
4291			for (int i = 0; i < 2; ++i)
4292			{
4293				glBindTexture(GL_TEXTURE_2D, m_render_target[i]);
4294				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
4295				glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 128, 128);
4296				glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 128, 128, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
4297			}
4298			glBindTexture(GL_TEXTURE_2D, 0);
4299		}
4300
4301		glGenFramebuffers(1, &m_framebuffer);
4302		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
4303		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_render_target[0], 0);
4304		glBindFramebuffer(GL_FRAMEBUFFER, 0);
4305
4306		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
4307		glUseProgram(m_program[0]);
4308		glBindVertexArray(m_vertex_array);
4309		glClear(GL_COLOR_BUFFER_BIT);
4310		glViewport(0, 0, 128, 128);
4311		// draw full-viewport triangle
4312		glDrawArrays(GL_TRIANGLES, 1,
4313					 3); // note: <first> is 1 this means that gl_VertexID in the VS will be: 1, 2 and 3
4314		glBindFramebuffer(GL_FRAMEBUFFER, 0);
4315
4316		glBindImageTexture(0, m_render_target[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);  // input
4317		glBindImageTexture(1, m_render_target[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); // output
4318		glUseProgram(m_program[1]);
4319		glDispatchCompute(128 / 4, 128 / 4, 1);
4320
4321		glBindImageTexture(0, m_render_target[1], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);  // input
4322		glBindImageTexture(1, m_render_target[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); // output
4323		glUseProgram(m_program[2]);
4324		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4325		glDispatchCompute(128 / 8, 128 / 8, 1);
4326
4327		/* validate render target */
4328		{
4329			std::vector<vec4> data(128 * 128);
4330			glBindTexture(GL_TEXTURE_2D, m_render_target[0]);
4331			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
4332			glGenFramebuffers(1, &m_fbo);
4333			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4334			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_render_target[0], 0);
4335			std::vector<GLubyte> colorData(128 * 128 * 4);
4336			glReadPixels(0, 0, 128, 128, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4337			for (int i = 0; i < 128 * 128 * 4; i += 4)
4338			{
4339				data[i / 4] =
4340					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4341						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4342			}
4343			for (std::size_t i = 0; i < data.size(); ++i)
4344			{
4345				if (!IsEqual(data[i], vec4(1, 1, 0, 1)))
4346				{
4347					m_context.getTestContext().getLog()
4348						<< tcu::TestLog::Message << "Invalid data at index " << i << ": " << data[i].x() << ", "
4349						<< data[i].y() << ", " << data[i].z() << ", " << data[i].w() << tcu::TestLog::EndMessage;
4350					return ERROR;
4351				}
4352			}
4353		}
4354		return NO_ERROR;
4355	}
4356
4357	virtual long Cleanup()
4358	{
4359		glViewport(0, 0, getWindowWidth(), getWindowHeight());
4360		glUseProgram(0);
4361		for (int i = 0; i < 3; ++i)
4362			glDeleteProgram(m_program[i]);
4363		glDeleteTextures(2, m_render_target);
4364		glDeleteVertexArrays(1, &m_vertex_array);
4365		glDeleteFramebuffers(1, &m_framebuffer);
4366		glDeleteFramebuffers(1, &m_fbo);
4367		return NO_ERROR;
4368	}
4369};
4370
4371class AdvancedPipelinePostXFB : public ComputeShaderBase
4372{
4373	virtual std::string Title()
4374	{
4375		return NL "CS as an additional pipeline stage - After XFB";
4376	}
4377	virtual std::string Purpose()
4378	{
4379		return NL "1. Verify that CS which process data fedback by VS works as expected." NL
4380				  "2. Verify that XFB and SSBO works correctly together in one shader." NL
4381				  "3. Verify that 'switch' statment which selects different execution path for each CS thread works as "
4382				  "expected.";
4383	}
4384	virtual std::string Method()
4385	{
4386		return NL "1. Draw triangle with XFB enabled. Some data is written to the XFB buffer." NL
4387				  "2. Use XFB buffer as 'input SSBO' in CS. Process data and write it to 'output SSBO'." NL
4388				  "3. Verify 'output SSBO' content.";
4389	}
4390	virtual std::string PassCriteria()
4391	{
4392		return NL "Everything works as expected.";
4393	}
4394
4395	GLuint m_program[2];
4396	GLuint m_storage_buffer;
4397	GLuint m_xfb_buffer;
4398	GLuint m_vertex_buffer;
4399	GLuint m_vertex_array;
4400
4401	virtual long Setup()
4402	{
4403		memset(m_program, 0, sizeof(m_program));
4404		m_storage_buffer = 0;
4405		m_xfb_buffer	 = 0;
4406		m_vertex_buffer  = 0;
4407		m_vertex_array   = 0;
4408		return NO_ERROR;
4409	}
4410	virtual long Run()
4411	{
4412		GLint res;
4413		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
4414		if (res <= 0)
4415		{
4416			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
4417			return NOT_SUPPORTED;
4418		}
4419
4420		const char* const glsl_vs =
4421			NL "layout(location = 0) in mediump vec4 g_position;" NL "layout(location = 1) in mediump vec4 g_color;" NL
4422			   "struct Vertex {" NL "  mediump vec4 position;" NL "  mediump vec4 color;" NL "};" NL
4423			   "flat out mediump vec4 color;" NL "layout(binding = 0) buffer StageData {" NL "  Vertex vertex[];" NL
4424			   "} g_vs_buffer;" NL "void main() {" NL "  gl_Position = g_position;" NL "  color = g_color;" NL
4425			   "  g_vs_buffer.vertex[gl_VertexID].position = g_position;" NL
4426			   "  g_vs_buffer.vertex[gl_VertexID].color = g_color;" NL "}";
4427		const char* const glsl_fs =
4428			NL "flat mediump in vec4 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
4429			   "  g_color = color;" NL "}";
4430		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
4431		/* setup xfb varyings */
4432		{
4433			const char* const var[2] = { "gl_Position", "color" };
4434			glTransformFeedbackVaryings(m_program[0], 2, var, GL_INTERLEAVED_ATTRIBS);
4435		}
4436		glLinkProgram(m_program[0]);
4437		if (!CheckProgram(m_program[0]))
4438			return ERROR;
4439
4440		const char* const glsl_cs =
4441			NL "layout(local_size_x = 3) in;" NL "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL
4442			   "layout(binding = 3, std430) buffer Buffer {" NL "  Vertex g_vertex[3];" NL "};" NL
4443			   "uniform vec4 g_color1;" NL "uniform int g_two;" NL "void UpdateVertex2(int i) {" NL
4444			   "  g_vertex[i].color -= vec4(-1, 1, 0, 0);" NL "}" NL "void main() {" NL
4445			   "  switch (gl_GlobalInvocationID.x) {" NL
4446			   "    case 0u: g_vertex[gl_GlobalInvocationID.x].color += vec4(1, 0, 0, 0); break;" NL
4447			   "    case 1u: g_vertex[1].color += g_color1; break;" NL "    case 2u: UpdateVertex2(g_two); break;" NL
4448			   "    default: return;" NL "  }" NL "}";
4449		m_program[1] = CreateComputeProgram(glsl_cs);
4450		glLinkProgram(m_program[1]);
4451		glUseProgram(m_program[1]);
4452		glUniform4f(glGetUniformLocation(m_program[1], "g_color1"), 0.f, 0.f, 1.f, 0.f);
4453		glUniform1i(glGetUniformLocation(m_program[1], "g_two"), 2);
4454		glUseProgram(0);
4455		if (!CheckProgram(m_program[1]))
4456			return ERROR;
4457
4458		glGenBuffers(1, &m_storage_buffer);
4459		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
4460		glBufferData(GL_SHADER_STORAGE_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STATIC_COPY);
4461
4462		glGenBuffers(1, &m_xfb_buffer);
4463		glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, m_xfb_buffer);
4464		glBufferData(GL_TRANSFORM_FEEDBACK_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STREAM_COPY);
4465
4466		const float in_data[3 * 8] = { -1, -1, 0, 1, 0, 1, 0, 1, 3, -1, 0, 1, 0, 1, 0, 1, -1, 3, 0, 1, 0, 1, 0, 1 };
4467		glGenBuffers(1, &m_vertex_buffer);
4468		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4469		glBufferData(GL_ARRAY_BUFFER, sizeof(in_data), in_data, GL_STATIC_DRAW);
4470		glBindBuffer(GL_ARRAY_BUFFER, 0);
4471
4472		glGenVertexArrays(1, &m_vertex_array);
4473		glBindVertexArray(m_vertex_array);
4474		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4475		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
4476		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
4477		glBindBuffer(GL_ARRAY_BUFFER, 0);
4478		glEnableVertexAttribArray(0);
4479		glEnableVertexAttribArray(1);
4480		glBindVertexArray(0);
4481
4482		glClear(GL_COLOR_BUFFER_BIT);
4483		glUseProgram(m_program[0]);
4484		glBindVertexArray(m_vertex_array);
4485		glBeginTransformFeedback(GL_TRIANGLES);
4486		glDrawArrays(GL_TRIANGLES, 0, 3);
4487		glEndTransformFeedback();
4488
4489		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_xfb_buffer);
4490		glUseProgram(m_program[1]);
4491		glDispatchCompute(1, 1, 1);
4492
4493		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
4494
4495		long error = NO_ERROR;
4496		/* validate storage buffer */
4497		{
4498			float* data;
4499			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
4500			data = static_cast<float*>(
4501				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * 3 * 8, GL_MAP_READ_BIT));
4502			if (memcmp(data, in_data, sizeof(float) * 3 * 8) != 0)
4503			{
4504				m_context.getTestContext().getLog()
4505					<< tcu::TestLog::Message << "Data in shader storage buffer is incorrect."
4506					<< tcu::TestLog::EndMessage;
4507				error = ERROR;
4508			}
4509			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4510			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4511		}
4512		/* validate xfb buffer */
4513		{
4514			const float ref_data[3 * 8] = {
4515				-1, -1, 0, 1, 1, 1, 0, 1, 3, -1, 0, 1, 0, 1, 1, 1, -1, 3, 0, 1, 1, 0, 0, 1
4516			};
4517			float* data;
4518			data = static_cast<float*>(
4519				glMapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, sizeof(float) * 3 * 8, GL_MAP_READ_BIT));
4520			if (memcmp(data, ref_data, sizeof(float) * 3 * 8) != 0)
4521			{
4522				m_context.getTestContext().getLog()
4523					<< tcu::TestLog::Message << "Data in xfb buffer is incorrect." << tcu::TestLog::EndMessage;
4524				error = ERROR;
4525			}
4526			glUnmapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER);
4527			glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
4528		}
4529		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
4530		{
4531			error = ERROR;
4532		}
4533		return error;
4534	}
4535	virtual long Cleanup()
4536	{
4537		glUseProgram(0);
4538		for (int i = 0; i < 2; ++i)
4539			glDeleteProgram(m_program[i]);
4540		glDeleteBuffers(1, &m_vertex_buffer);
4541		glDeleteBuffers(1, &m_storage_buffer);
4542		glDeleteBuffers(1, &m_xfb_buffer);
4543		glDeleteVertexArrays(1, &m_vertex_array);
4544		return NO_ERROR;
4545	}
4546};
4547
4548class AdvancedSharedIndexing : public ComputeShaderBase
4549{
4550	virtual std::string Title()
4551	{
4552		return NL "Shared Memory - Indexing";
4553	}
4554	virtual std::string Purpose()
4555	{
4556		return NL "1. Verify that indexing various types of shared memory works as expected." NL
4557				  "2. Verify that indexing shared memory with different types of expressions work as expected." NL
4558				  "3. Verify that all declaration types of shared structures are supported by the GLSL compiler.";
4559	}
4560	virtual std::string Method()
4561	{
4562		return NL "1. Create CS which uses shared memory in many different ways." NL
4563				  "2. Write to shared memory using different expressions." NL "3. Validate shared memory content." NL
4564				  "4. Use synchronization primitives (barrier, groupMemoryBarrier) where applicable.";
4565	}
4566	virtual std::string PassCriteria()
4567	{
4568		return NL "Everyting works as expected.";
4569	}
4570
4571	GLuint m_program;
4572	GLuint m_texture;
4573	GLuint m_fbo;
4574
4575	virtual long Setup()
4576	{
4577		m_program = 0;
4578		m_texture = 0;
4579		m_fbo	 = 0;
4580		return NO_ERROR;
4581	}
4582	virtual long Run()
4583	{
4584		const char* const glsl_cs =
4585			NL "layout(binding = 3, rgba8) uniform mediump writeonly image2D g_result_image;" NL
4586			   "layout (local_size_x = 4,local_size_y=4 ) in;" NL "shared vec4 g_shared1[4];" NL
4587			   "shared mat4 g_shared2;" NL "shared struct {" NL "  float data[4];" NL "} g_shared3[4];" NL
4588			   "shared struct Type { float data[4]; } g_shared4[4];" NL "shared Type g_shared5[4];" NL
4589			   "uniform bool g_true;" NL "uniform float g_values[16];" NL NL "void Sync() {" NL
4590			   "  groupMemoryBarrier();" NL "  barrier();" NL "}" NL "void SetMemory(ivec2 xy, float value) {" NL
4591			   "  g_shared1[xy.y][gl_LocalInvocationID.x] = value;" NL "  g_shared2[xy.y][xy.x] = value;" NL
4592			   "  g_shared3[xy[1]].data[xy[0]] = value;" NL "  g_shared4[xy.y].data[xy[0]] = value;" NL
4593			   "  g_shared5[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] = value;" NL "}" NL
4594			   "bool CheckMemory(ivec2 xy, float expected) {" NL
4595			   "  if (g_shared1[xy.y][xy[0]] != expected) return false;" NL
4596			   "  if (g_shared2[xy[1]][xy[0]] != expected) return false;" NL
4597			   "  if (g_shared3[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] != expected) return false;" NL
4598			   "  if (g_shared4[gl_LocalInvocationID.y].data[xy.x] != expected) return false;" NL
4599			   "  if (g_shared5[xy.y].data[xy.x] != expected) return false;" NL "  return true;" NL "}" NL
4600			   "void main() {" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
4601			   "  vec4 result = vec4(0.0, 1.0, 0.0, 1.0);" NL NL
4602			   "  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0);" NL "  Sync();" NL
4603			   "  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0)) result = vec4(1.0, 0.0, 0.0, "
4604			   "1.0);" NL NL "  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0);" NL "  Sync();" NL
4605			   "  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0)) result = vec4(1.0, 0.0, 0.0, "
4606			   "1.0);" NL NL "  if (g_true && gl_LocalInvocationID.x < 10u) {" NL
4607			   "    SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0);" NL "    Sync();" NL
4608			   "    if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0)) result = vec4(1.0, 0.0, 0.0, "
4609			   "1.0);" NL "  }" NL NL "  imageStore(g_result_image, thread_xy, result);" NL "}";
4610
4611		m_program = CreateComputeProgram(glsl_cs);
4612		glLinkProgram(m_program);
4613		if (!CheckProgram(m_program))
4614			return ERROR;
4615
4616		/* init texture */
4617		{
4618			std::vector<GLint> data(4 * 4 * 4);
4619			glGenTextures(1, &m_texture);
4620			glBindTexture(GL_TEXTURE_2D, m_texture);
4621			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
4622			glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 4, 4);
4623			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
4624			glBindTexture(GL_TEXTURE_2D, 0);
4625		}
4626
4627		glBindImageTexture(3, m_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
4628		glUseProgram(m_program);
4629		glUniform1i(glGetUniformLocation(m_program, "g_true"), GL_TRUE);
4630		GLfloat values[16] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f };
4631		glUniform1fv(glGetUniformLocation(m_program, "g_values"), 16, values);
4632		glDispatchCompute(1, 1, 1);
4633
4634		/* validate render target */
4635		{
4636			std::vector<vec4> data(4 * 4);
4637			glBindTexture(GL_TEXTURE_2D, m_texture);
4638			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
4639			glGenFramebuffers(1, &m_fbo);
4640			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4641			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture, 0);
4642			std::vector<GLubyte> colorData(4 * 4 * 4);
4643			glReadPixels(0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4644			for (int i = 0; i < 4 * 4 * 4; i += 4)
4645			{
4646				data[i / 4] =
4647					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4648						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4649			}
4650			for (std::size_t i = 0; i < data.size(); ++i)
4651			{
4652				if (!IsEqual(data[i], vec4(0, 1, 0, 1)))
4653				{
4654					m_context.getTestContext().getLog()
4655						<< tcu::TestLog::Message << "Invalid data at index " << i << tcu::TestLog::EndMessage;
4656					return ERROR;
4657				}
4658			}
4659		}
4660		return NO_ERROR;
4661	}
4662	virtual long Cleanup()
4663	{
4664		glUseProgram(0);
4665		glDeleteProgram(m_program);
4666		glDeleteTextures(1, &m_texture);
4667		glDeleteFramebuffers(1, &m_fbo);
4668		return NO_ERROR;
4669	}
4670};
4671
4672class AdvancedSharedMax : public ComputeShaderBase
4673{
4674	virtual std::string Title()
4675	{
4676		return NL "Shared Memory - 16K";
4677	}
4678	virtual std::string Purpose()
4679	{
4680		return NL "Support for 16K of shared memory is required by the OpenGL specifaction. Verify if an "
4681				  "implementation supports it.";
4682	}
4683	virtual std::string Method()
4684	{
4685		return NL "Create and dispatch CS which uses 16K of shared memory.";
4686	}
4687	virtual std::string PassCriteria()
4688	{
4689		return NL "Everything works as expected.";
4690	}
4691
4692	GLuint m_program;
4693	GLuint m_buffer;
4694
4695	virtual long Setup()
4696	{
4697		m_program = 0;
4698		m_buffer  = 0;
4699		return NO_ERROR;
4700	}
4701	virtual long Run()
4702	{
4703		const char* const glsl_cs = NL
4704			"layout(local_size_x = 64) in;" NL
4705			"shared struct Type { vec4 v[16]; } g_shared[64];" // 16384 bytes of shared memory
4706			NL "layout(std430) buffer Output {" NL "  Type g_output[64];" NL "};" NL NL "void main() {" NL
4707			"  int id = int(gl_GlobalInvocationID.x);" NL
4708			"  g_shared[id].v = vec4[16](vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), "
4709			"vec4(1.0)," NL "                            vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), "
4710			"vec4(1.0), vec4(1.0), vec4(1.0));" NL "  memoryBarrierShared();" NL "  barrier();" NL NL
4711			"  vec4 sum = vec4(0.0);" NL "  int sum_count = 0;" NL "  for (int i = id - 6; i < id + 9; ++i) {" NL
4712			"    if (id >= 0 && id < g_shared.length()) {" NL "      sum += g_shared[id].v[0];" NL
4713			"      sum += g_shared[id].v[1];" NL "      sum += g_shared[id].v[2];" NL
4714			"      sum += g_shared[id].v[3];" NL "      sum += g_shared[id].v[4];" NL
4715			"      sum += g_shared[id].v[5];" NL "      sum += g_shared[id].v[6];" NL
4716			"      sum += g_shared[id].v[7];" NL "      sum += g_shared[id].v[8];" NL
4717			"      sum += g_shared[id].v[9];" NL "      sum += g_shared[id].v[10];" NL
4718			"      sum += g_shared[id].v[11];" NL "      sum += g_shared[id].v[12];" NL
4719			"      sum += g_shared[id].v[13];" NL "      sum += g_shared[id].v[14];" NL
4720			"      sum += g_shared[id].v[15];" NL "      sum_count += 16;" NL "    }" NL "  }" NL
4721			"  sum = abs((sum / float(sum_count)) - vec4(1.0));" NL
4722			"  if (sum.x > 0.0000001f || sum.y > 0.0000001f || sum.z > 0.0000001f || sum.w > 0.0000001f) return;" NL NL
4723			"  g_output[id] = g_shared[id];" NL "}";
4724
4725		m_program = CreateComputeProgram(glsl_cs);
4726		glLinkProgram(m_program);
4727		if (!CheckProgram(m_program))
4728			return ERROR;
4729
4730		/* init buffer */
4731		{
4732			std::vector<vec4> data(1024);
4733			glGenBuffers(1, &m_buffer);
4734			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
4735			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0],
4736						 GL_DYNAMIC_COPY);
4737		}
4738
4739		glUseProgram(m_program);
4740		glDispatchCompute(1, 1, 1);
4741		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
4742
4743		long error = NO_ERROR;
4744		/* validate buffer */
4745		{
4746			vec4* data;
4747			data =
4748				static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * 1024, GL_MAP_READ_BIT));
4749			for (std::size_t i = 0; i < 1024; ++i)
4750			{
4751				if (!IsEqual(data[i], vec4(1.0f)))
4752				{
4753					m_context.getTestContext().getLog()
4754						<< tcu::TestLog::Message << "Invalid data at index " << i << tcu::TestLog::EndMessage;
4755					error = ERROR;
4756				}
4757			}
4758		}
4759		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4760		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4761		return error;
4762	}
4763	virtual long Cleanup()
4764	{
4765		glUseProgram(0);
4766		glDeleteProgram(m_program);
4767		glDeleteBuffers(1, &m_buffer);
4768		return NO_ERROR;
4769	}
4770};
4771
4772class AdvancedResourcesMax : public ComputeShaderBase
4773{
4774	virtual std::string Title()
4775	{
4776		return NL "Maximum number of resources in one shader";
4777	}
4778	virtual std::string Purpose()
4779	{
4780		return NL "1. Verify that using 4 SSBOs, 12 UBOs, 8 atomic counters" NL "   in one CS works as expected.";
4781	}
4782	virtual std::string Method()
4783	{
4784		return NL "Create and dispatch CS. Verify result.";
4785	}
4786	virtual std::string PassCriteria()
4787	{
4788		return NL "Everything works as expected.";
4789	}
4790
4791	GLuint m_program;
4792	GLuint m_storage_buffer[4];
4793	GLuint m_uniform_buffer[12];
4794	GLuint m_atomic_buffer;
4795
4796	bool RunIteration(GLuint index)
4797	{
4798		for (GLuint i = 0; i < 4; ++i)
4799		{
4800			const GLuint data = i + 1;
4801			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, m_storage_buffer[i]);
4802			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
4803		}
4804		for (GLuint i = 0; i < 12; ++i)
4805		{
4806			const GLuint data = i + 1;
4807			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
4808			glBufferData(GL_UNIFORM_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
4809		}
4810		{
4811			GLuint data[8];
4812			for (GLuint i = 0; i < 8; ++i)
4813				data[i]   = i + 1;
4814			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_atomic_buffer);
4815			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), &data[0], GL_STATIC_DRAW);
4816		}
4817
4818		glUseProgram(m_program);
4819		glUniform1ui(glGetUniformLocation(m_program, "g_index"), index);
4820		/* uniform array */
4821		{
4822			std::vector<GLuint> data(480);
4823			for (GLuint i = 0; i < static_cast<GLuint>(data.size()); ++i)
4824				data[i]   = i + 1;
4825			glUniform1uiv(glGetUniformLocation(m_program, "g_uniform_def"), static_cast<GLsizei>(data.size()),
4826						  &data[0]);
4827		}
4828		glDispatchCompute(1, 1, 1);
4829		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
4830
4831		bool ret = true;
4832		/* validate buffer */
4833		{
4834			GLuint* data;
4835			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[index]);
4836			data = static_cast<GLuint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint), GL_MAP_READ_BIT));
4837			if (data[0] != (index + 1) * 4)
4838			{
4839				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data[0] << " should be "
4840													<< ((index + 1) * 4) << tcu::TestLog::EndMessage;
4841				ret = false;
4842			}
4843		}
4844		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4845		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4846		return ret;
4847	}
4848	virtual long Setup()
4849	{
4850		m_program = 0;
4851		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
4852		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
4853		m_atomic_buffer = 0;
4854		return NO_ERROR;
4855	}
4856	virtual long Run()
4857	{
4858		const char* const glsl_cs =
4859			NL "layout(local_size_x = 1) in;" NL "layout(std140, binding = 0) buffer ShaderStorageBlock {" NL
4860			   "  uint data;" NL "} g_shader_storage[4];" NL "layout(std140, binding = 0) uniform UniformBlock {" NL
4861			   "  uint data;" NL "} g_uniform[12];" NL
4862			   "layout(binding = 0, offset =  0) uniform atomic_uint g_atomic_counter0;" NL
4863			   "layout(binding = 0, offset =  4) uniform atomic_uint g_atomic_counter1;" NL
4864			   "layout(binding = 0, offset =  8) uniform atomic_uint g_atomic_counter2;" NL
4865			   "layout(binding = 0, offset = 12) uniform atomic_uint g_atomic_counter3;" NL
4866			   "layout(binding = 0, offset = 16) uniform atomic_uint g_atomic_counter4;" NL
4867			   "layout(binding = 0, offset = 20) uniform atomic_uint g_atomic_counter5;" NL
4868			   "layout(binding = 0, offset = 24) uniform atomic_uint g_atomic_counter6;" NL
4869			   "layout(binding = 0, offset = 28) uniform atomic_uint g_atomic_counter7;" NL
4870			   "uniform uint g_uniform_def[480];" NL "uniform uint g_index;" NL NL "uint Add() {" NL
4871			   "  switch (g_index) {" NL "    case 0u: return atomicCounter(g_atomic_counter0);" NL
4872			   "    case 1u: return atomicCounter(g_atomic_counter1);" NL
4873			   "    case 2u: return atomicCounter(g_atomic_counter2);" NL
4874			   "    case 3u: return atomicCounter(g_atomic_counter3);" NL
4875			   "    case 4u: return atomicCounter(g_atomic_counter4);" NL
4876			   "    case 5u: return atomicCounter(g_atomic_counter5);" NL
4877			   "    case 6u: return atomicCounter(g_atomic_counter6);" NL
4878			   "    case 7u: return atomicCounter(g_atomic_counter7);" NL "  }" NL "}" NL "void main() {" NL
4879			   "  switch (g_index) {" NL "    case 0u: {" NL "      g_shader_storage[0].data += g_uniform[0].data;" NL
4880			   "      g_shader_storage[0].data += Add();" NL "      g_shader_storage[0].data += g_uniform_def[0];" NL
4881			   "      break;" NL "    }" NL "    case 1u: {" NL
4882			   "      g_shader_storage[1].data += g_uniform[1].data;" NL "      g_shader_storage[1].data += Add();" NL
4883			   "      g_shader_storage[1].data += g_uniform_def[1];" NL "      break;" NL "    }" NL "    case 2u: {" NL
4884			   "      g_shader_storage[2].data += g_uniform[2].data;" NL "      g_shader_storage[2].data += Add();" NL
4885			   "      g_shader_storage[2].data += g_uniform_def[2];" NL "      break;" NL "    }" NL "    case 3u: {" NL
4886			   "      g_shader_storage[3].data += g_uniform[3].data;" NL "      g_shader_storage[3].data += Add();" NL
4887			   "      g_shader_storage[3].data += g_uniform_def[3];" NL "      break;" NL "    }" NL "  }" NL "}";
4888		m_program = CreateComputeProgram(glsl_cs);
4889		glLinkProgram(m_program);
4890		if (!CheckProgram(m_program))
4891			return ERROR;
4892
4893		glGenBuffers(4, m_storage_buffer);
4894		glGenBuffers(12, m_uniform_buffer);
4895		glGenBuffers(1, &m_atomic_buffer);
4896
4897		if (!RunIteration(0))
4898			return ERROR;
4899		if (!RunIteration(1))
4900			return ERROR;
4901		if (!RunIteration(3))
4902			return ERROR;
4903
4904		return NO_ERROR;
4905	}
4906	virtual long Cleanup()
4907	{
4908		glUseProgram(0);
4909		glDeleteProgram(m_program);
4910		glDeleteBuffers(4, m_storage_buffer);
4911		glDeleteBuffers(12, m_uniform_buffer);
4912		glDeleteBuffers(1, &m_atomic_buffer);
4913		return NO_ERROR;
4914	}
4915};
4916
4917class NegativeAPINoActiveProgram : public ComputeShaderBase
4918{
4919	virtual std::string Title()
4920	{
4921		return NL "API errors - no active program";
4922	}
4923	virtual std::string Purpose()
4924	{
4925		return NL "Verify that appropriate errors are generated by the OpenGL API.";
4926	}
4927	virtual std::string Method()
4928	{
4929		return NL "";
4930	}
4931	virtual std::string PassCriteria()
4932	{
4933		return NL "";
4934	}
4935
4936	GLuint m_program;
4937
4938	virtual long Setup()
4939	{
4940		m_program = 0;
4941		return NO_ERROR;
4942	}
4943	virtual long Run()
4944	{
4945		glDispatchCompute(1, 2, 3);
4946		if (glGetError() != GL_INVALID_OPERATION)
4947		{
4948			m_context.getTestContext().getLog()
4949				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
4950											"DispatchComputeIndirect if there is no active program for the compute\n"
4951											"shader stage."
4952				<< tcu::TestLog::EndMessage;
4953			return ERROR;
4954		}
4955
4956		/* indirect dispatch */
4957		{
4958			GLuint		 buffer;
4959			const GLuint num_group[3] = { 3, 2, 1 };
4960			glGenBuffers(1, &buffer);
4961			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
4962			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
4963			glDispatchComputeIndirect(0);
4964			glDeleteBuffers(1, &buffer);
4965			if (glGetError() != GL_INVALID_OPERATION)
4966			{
4967				m_context.getTestContext().getLog()
4968					<< tcu::TestLog::Message
4969					<< "INVALID_OPERATION is generated by DispatchCompute or\n"
4970					   "DispatchComputeIndirect if there is no active program for the compute\n"
4971					   "shader stage."
4972					<< tcu::TestLog::EndMessage;
4973				return ERROR;
4974			}
4975		}
4976
4977		const char* const glsl_vs = NL "layout(location = 0) in mediump vec4 g_position;" NL "void main() {" NL
4978									   "  gl_Position = g_position;" NL "}";
4979		const char* const glsl_fs =
4980			NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
4981		m_program = CreateProgram(glsl_vs, glsl_fs);
4982		glLinkProgram(m_program);
4983		if (!CheckProgram(m_program))
4984			return ERROR;
4985
4986		glUseProgram(m_program);
4987
4988		glDispatchCompute(1, 2, 3);
4989		if (glGetError() != GL_INVALID_OPERATION)
4990		{
4991			m_context.getTestContext().getLog()
4992				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
4993											"DispatchComputeIndirect if there is no active program for the compute\n"
4994											"shader stage."
4995				<< tcu::TestLog::EndMessage;
4996			return ERROR;
4997		}
4998
4999		/* indirect dispatch */
5000		{
5001			GLuint		 buffer;
5002			const GLuint num_group[3] = { 3, 2, 1 };
5003			glGenBuffers(1, &buffer);
5004			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
5005			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
5006			glDispatchComputeIndirect(0);
5007			glDeleteBuffers(1, &buffer);
5008			if (glGetError() != GL_INVALID_OPERATION)
5009			{
5010				m_context.getTestContext().getLog()
5011					<< tcu::TestLog::Message
5012					<< "INVALID_OPERATION is generated by DispatchCompute or\n"
5013					   "DispatchComputeIndirect if there is no active program for the compute\n"
5014					   "shader stage."
5015					<< tcu::TestLog::EndMessage;
5016				return ERROR;
5017			}
5018		}
5019
5020		return NO_ERROR;
5021	}
5022	virtual long Cleanup()
5023	{
5024		glUseProgram(0);
5025		glDeleteProgram(m_program);
5026		return NO_ERROR;
5027	}
5028};
5029
5030class NegativeAPIWorkGroupCount : public ComputeShaderBase
5031{
5032	virtual std::string Title()
5033	{
5034		return NL "API errors - invalid work group count";
5035	}
5036	virtual std::string Purpose()
5037	{
5038		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5039	}
5040	virtual std::string Method()
5041	{
5042		return NL "";
5043	}
5044	virtual std::string PassCriteria()
5045	{
5046		return NL "";
5047	}
5048
5049	GLuint m_program;
5050	GLuint m_storage_buffer;
5051
5052	virtual long Setup()
5053	{
5054		m_program		 = 0;
5055		m_storage_buffer = 0;
5056		return NO_ERROR;
5057	}
5058	virtual long Run()
5059	{
5060		const char* const glsl_cs =
5061			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
5062			   "void main() {" NL
5063			   "  g_output[gl_GlobalInvocationID.x * gl_GlobalInvocationID.y * gl_GlobalInvocationID.z] = 0u;" NL "}";
5064		m_program = CreateComputeProgram(glsl_cs);
5065		glLinkProgram(m_program);
5066		if (!CheckProgram(m_program))
5067			return ERROR;
5068
5069		glGenBuffers(1, &m_storage_buffer);
5070		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5071		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
5072
5073		GLint x, y, z;
5074		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &x);
5075		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &y);
5076		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &z);
5077
5078		glUseProgram(m_program);
5079
5080		glDispatchCompute(x + 1, 1, 1);
5081		if (glGetError() != GL_INVALID_VALUE)
5082		{
5083			m_context.getTestContext().getLog()
5084				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
5085											"<num_groups_y> or <num_groups_z> is greater than the value of\n"
5086											"MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension."
5087				<< tcu::TestLog::EndMessage;
5088			return ERROR;
5089		}
5090
5091		glDispatchCompute(1, y + 1, 1);
5092		if (glGetError() != GL_INVALID_VALUE)
5093		{
5094			m_context.getTestContext().getLog()
5095				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
5096											"<num_groups_y> or <num_groups_z> is greater than the value of\n"
5097											"MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension."
5098				<< tcu::TestLog::EndMessage;
5099			return ERROR;
5100		}
5101
5102		glDispatchCompute(1, 1, z + 1);
5103		if (glGetError() != GL_INVALID_VALUE)
5104		{
5105			m_context.getTestContext().getLog()
5106				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
5107											"<num_groups_y> or <num_groups_z> is greater than the value of\n"
5108											"MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension."
5109				<< tcu::TestLog::EndMessage;
5110			return ERROR;
5111		}
5112
5113		return NO_ERROR;
5114	}
5115	virtual long Cleanup()
5116	{
5117		glUseProgram(0);
5118		glDeleteProgram(m_program);
5119		glDeleteBuffers(1, &m_storage_buffer);
5120		return NO_ERROR;
5121	}
5122};
5123
5124class NegativeAPIIndirect : public ComputeShaderBase
5125{
5126	virtual std::string Title()
5127	{
5128		return NL "API errors - incorrect DispatchComputeIndirect usage";
5129	}
5130	virtual std::string Purpose()
5131	{
5132		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5133	}
5134	virtual std::string Method()
5135	{
5136		return NL "";
5137	}
5138	virtual std::string PassCriteria()
5139	{
5140		return NL "";
5141	}
5142
5143	GLuint m_program;
5144	GLuint m_storage_buffer;
5145	GLuint m_dispatch_buffer;
5146
5147	virtual long Setup()
5148	{
5149		m_program		  = 0;
5150		m_storage_buffer  = 0;
5151		m_dispatch_buffer = 0;
5152		return NO_ERROR;
5153	}
5154
5155	virtual long Run()
5156	{
5157		const char* const glsl_cs =
5158			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
5159			   "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0u;" NL "}";
5160		m_program = CreateComputeProgram(glsl_cs);
5161		glLinkProgram(m_program);
5162		if (!CheckProgram(m_program))
5163			return ERROR;
5164		glUseProgram(m_program);
5165
5166		glGenBuffers(1, &m_storage_buffer);
5167		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5168		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
5169
5170		const GLuint num_groups[6] = { 1, 1, 1, 1, 1, 1 };
5171		glGenBuffers(1, &m_dispatch_buffer);
5172		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
5173		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_COPY);
5174
5175		glDispatchComputeIndirect(-2);
5176		if (glGetError() != GL_INVALID_VALUE)
5177		{
5178			m_context.getTestContext().getLog()
5179				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
5180											"less than zero or not a multiple of four."
5181				<< tcu::TestLog::EndMessage;
5182			return ERROR;
5183		}
5184
5185		glDispatchComputeIndirect(3);
5186		if (glGetError() != GL_INVALID_VALUE)
5187		{
5188			m_context.getTestContext().getLog()
5189				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
5190											"less than zero or not a multiple of four."
5191				<< tcu::TestLog::EndMessage;
5192			return ERROR;
5193		}
5194
5195		glDispatchComputeIndirect(16);
5196		if (glGetError() != GL_INVALID_OPERATION)
5197		{
5198			m_context.getTestContext().getLog()
5199				<< tcu::TestLog::Message
5200				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
5201				   "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
5202				   "beyond the end of the bound buffer object."
5203				<< tcu::TestLog::EndMessage;
5204			return ERROR;
5205		}
5206
5207		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
5208		glDispatchComputeIndirect(0);
5209		if (glGetError() != GL_INVALID_OPERATION)
5210		{
5211			m_context.getTestContext().getLog()
5212				<< tcu::TestLog::Message
5213				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
5214				   "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
5215				   "beyond the end of the bound buffer object."
5216				<< tcu::TestLog::EndMessage;
5217			return ERROR;
5218		}
5219
5220		return NO_ERROR;
5221	}
5222	virtual long Cleanup()
5223	{
5224		glUseProgram(0);
5225		glDeleteProgram(m_program);
5226		glDeleteBuffers(1, &m_storage_buffer);
5227		glDeleteBuffers(1, &m_dispatch_buffer);
5228		return NO_ERROR;
5229	}
5230};
5231
5232class NegativeAPIProgram : public ComputeShaderBase
5233{
5234	virtual std::string Title()
5235	{
5236		return NL "API errors - program state";
5237	}
5238	virtual std::string Purpose()
5239	{
5240		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5241	}
5242	virtual std::string Method()
5243	{
5244		return NL "";
5245	}
5246	virtual std::string PassCriteria()
5247	{
5248		return NL "";
5249	}
5250
5251	GLuint m_program;
5252	GLuint m_storage_buffer;
5253
5254	virtual long Setup()
5255	{
5256		m_program		 = 0;
5257		m_storage_buffer = 0;
5258		return NO_ERROR;
5259	}
5260	virtual long Run()
5261	{
5262		const char* const glsl_vs = NL "layout(location = 0) in mediump vec4 g_position;" NL "void main() {" NL
5263									   "  gl_Position = g_position;" NL "}";
5264		const char* const glsl_fs =
5265			NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
5266		m_program = CreateProgram(glsl_vs, glsl_fs);
5267
5268		GLint v[3];
5269		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
5270		if (glGetError() != GL_INVALID_OPERATION)
5271		{
5272			m_context.getTestContext().getLog()
5273				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
5274											"COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
5275											"successfully, or has been linked but contains no compute shaders."
5276				<< tcu::TestLog::EndMessage;
5277			return ERROR;
5278		}
5279
5280		glLinkProgram(m_program);
5281		if (!CheckProgram(m_program))
5282			return ERROR;
5283
5284		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
5285		if (glGetError() != GL_INVALID_OPERATION)
5286		{
5287			m_context.getTestContext().getLog()
5288				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
5289											"COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
5290											"successfully, or has been linked but contains no compute shaders."
5291				<< tcu::TestLog::EndMessage;
5292			return ERROR;
5293		}
5294		glDeleteProgram(m_program);
5295
5296		const char* const glsl_cs =
5297			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
5298			"  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
5299		m_program = glCreateProgram();
5300
5301		GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
5302		glAttachShader(m_program, sh);
5303		glDeleteShader(sh);
5304		glShaderSource(sh, 1, &glsl_cs, NULL);
5305		glCompileShader(sh);
5306
5307		sh = glCreateShader(GL_VERTEX_SHADER);
5308		glAttachShader(m_program, sh);
5309		glDeleteShader(sh);
5310		glShaderSource(sh, 1, &glsl_vs, NULL);
5311		glCompileShader(sh);
5312
5313		sh = glCreateShader(GL_FRAGMENT_SHADER);
5314		glAttachShader(m_program, sh);
5315		glDeleteShader(sh);
5316		glShaderSource(sh, 1, &glsl_fs, NULL);
5317		glCompileShader(sh);
5318
5319		glLinkProgram(m_program);
5320		GLint status;
5321		glGetProgramiv(m_program, GL_LINK_STATUS, &status);
5322		if (status == GL_TRUE)
5323		{
5324			m_context.getTestContext().getLog()
5325				<< tcu::TestLog::Message << "LinkProgram will fail if <program> contains a combination of compute and\n"
5326											"non-compute shaders."
5327				<< tcu::TestLog::EndMessage;
5328			return ERROR;
5329		}
5330
5331		return NO_ERROR;
5332	}
5333	virtual long Cleanup()
5334	{
5335		glUseProgram(0);
5336		glDeleteProgram(m_program);
5337		glDeleteBuffers(1, &m_storage_buffer);
5338		return NO_ERROR;
5339	}
5340};
5341
5342class NegativeGLSLCompileTimeErrors : public ComputeShaderBase
5343{
5344	virtual std::string Title()
5345	{
5346		return NL "Compile-time errors";
5347	}
5348	virtual std::string Purpose()
5349	{
5350		return NL "Verify that appropriate errors are generated by the GLSL compiler.";
5351	}
5352	virtual std::string Method()
5353	{
5354		return NL "";
5355	}
5356	virtual std::string PassCriteria()
5357	{
5358		return NL "";
5359	}
5360
5361	static std::string Shader1(int x, int y, int z)
5362	{
5363		std::stringstream ss;
5364		ss << "#version 310 es" NL "layout(local_size_x = " << x << ", local_size_y = " << y << ", local_size_z = " << z
5365		   << ") in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5366			  "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
5367		return ss.str();
5368	}
5369	virtual long Run()
5370	{
5371		// gl_GlobalInvocationID requires "#version 310" or later
5372		if (!Compile("#version 300 es" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
5373					 "  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL
5374					 "}"))
5375			return ERROR;
5376
5377		if (!Compile("#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(local_size_x = 2) in;" NL
5378					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5379					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
5380			return ERROR;
5381
5382		if (!Compile("#version 310 es" NL "layout(local_size_x = 1) in;" NL "in uint x;" NL
5383					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5384					 "  g_output[gl_GlobalInvocationID.x] = x;" NL "}"))
5385			return ERROR;
5386
5387		if (!Compile("#version 310 es" NL "layout(local_size_x = 1) in;" NL "out uint x;" NL
5388					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5389					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "  x = 0;" NL "}"))
5390			return ERROR;
5391
5392		{
5393			GLint x, y, z;
5394			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &x);
5395			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &y);
5396			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &z);
5397
5398			if (!Compile(Shader1(x + 1, 1, 1)))
5399				return ERROR;
5400			if (!Compile(Shader1(1, y + 1, 1)))
5401				return ERROR;
5402			if (!Compile(Shader1(1, 1, z + 1)))
5403				return ERROR;
5404		}
5405
5406		return NO_ERROR;
5407	}
5408
5409	bool Compile(const std::string& source)
5410	{
5411		const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
5412
5413		const char* const src = source.c_str();
5414		glShaderSource(sh, 1, &src, NULL);
5415		glCompileShader(sh);
5416
5417		GLchar log[1024];
5418		glGetShaderInfoLog(sh, sizeof(log), NULL, log);
5419		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Shader Info Log:\n"
5420											<< log << tcu::TestLog::EndMessage;
5421
5422		GLint status;
5423		glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
5424		glDeleteShader(sh);
5425
5426		if (status == GL_TRUE)
5427		{
5428			m_context.getTestContext().getLog()
5429				<< tcu::TestLog::Message << "Compilation should fail." << tcu::TestLog::EndMessage;
5430			return false;
5431		}
5432
5433		return true;
5434	}
5435};
5436
5437class NegativeGLSLLinkTimeErrors : public ComputeShaderBase
5438{
5439	virtual std::string Title()
5440	{
5441		return NL "Link-time errors";
5442	}
5443	virtual std::string Purpose()
5444	{
5445		return NL "Verify that appropriate errors are generated by the GLSL linker.";
5446	}
5447	virtual std::string Method()
5448	{
5449		return NL "";
5450	}
5451	virtual std::string PassCriteria()
5452	{
5453		return NL "";
5454	}
5455
5456	virtual long Run()
5457	{
5458		const char* const glsl_cs =
5459			NL "layout(local_size_x = 1, local_size_y = 1) in;" NL "layout(std430) buffer Output {" NL "  vec4 data;" NL
5460			   "} g_out;" NL "void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
5461		const char* const glsl_vs = NL "layout(location = 0) in mediump vec4 g_position;" NL "void main() {" NL
5462									   "  gl_Position = g_position;" NL "}";
5463		const char* const glsl_fs =
5464			NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
5465
5466		GLuint p = CreateComputeProgram(glsl_cs);
5467
5468		{
5469			const GLuint sh = glCreateShader(GL_VERTEX_SHADER);
5470			glAttachShader(p, sh);
5471			glDeleteShader(sh);
5472			const char* const src[2] = { kGLSLVer, glsl_vs };
5473			glShaderSource(sh, 2, src, NULL);
5474			glCompileShader(sh);
5475		}
5476		{
5477			const GLuint sh = glCreateShader(GL_FRAGMENT_SHADER);
5478			glAttachShader(p, sh);
5479			glDeleteShader(sh);
5480			const char* const src[2] = { kGLSLVer, glsl_fs };
5481			glShaderSource(sh, 2, src, NULL);
5482			glCompileShader(sh);
5483		}
5484		long error = NO_ERROR;
5485		glLinkProgram(p);
5486		if (CheckProgram(p))
5487			error = ERROR;
5488
5489		/* no layout */
5490		const char* const glsl_cs2 = NL "layout(std430) buffer Output {" NL "  vec4 data;" NL "} g_out;" NL
5491										"void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
5492
5493		GLuint p2 = CreateComputeProgram(glsl_cs2);
5494		glLinkProgram(p2);
5495		if (CheckProgram(p2))
5496			error = ERROR;
5497
5498		glDeleteProgram(p);
5499		glDeleteProgram(p2);
5500		return error;
5501	}
5502};
5503
5504class BasicWorkGroupSizeIsConst : public ComputeShaderBase
5505{
5506	virtual std::string Title()
5507	{
5508		return NL "gl_WorkGroupSize is an constant";
5509	}
5510	virtual std::string Purpose()
5511	{
5512		return NL "Verify that gl_WorkGroupSize can be used as an constant expression.";
5513	}
5514	virtual std::string Method()
5515	{
5516		return NL "";
5517	}
5518	virtual std::string PassCriteria()
5519	{
5520		return NL "";
5521	}
5522
5523	GLuint m_program;
5524	GLuint m_storage_buffer;
5525
5526	virtual long Setup()
5527	{
5528		m_program		 = 0;
5529		m_storage_buffer = 0;
5530		return NO_ERROR;
5531	}
5532
5533	virtual long Run()
5534	{
5535		const char* const glsl_cs =
5536			NL "layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;" NL
5537			   "layout(std430, binding = 0) buffer Output {" NL "  uint g_buffer[22u + gl_WorkGroupSize.x];" NL "};" NL
5538			   "shared uint g_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];" NL
5539			   "uniform uint g_uniform[gl_WorkGroupSize.z + 20u];" NL "void main() {" NL
5540			   "  g_shared[gl_LocalInvocationIndex] = 1U;" NL "  groupMemoryBarrier();" NL "  barrier();" NL
5541			   "  uint sum = 0u;" NL
5542			   "  for (uint i = 0u; i < gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; ++i) {" NL
5543			   "    sum += g_shared[i];" NL "  }" NL "  sum += g_uniform[gl_LocalInvocationIndex];" NL
5544			   "  g_buffer[gl_LocalInvocationIndex] = sum;" NL "}";
5545		m_program = CreateComputeProgram(glsl_cs);
5546		glLinkProgram(m_program);
5547		if (!CheckProgram(m_program))
5548			return ERROR;
5549
5550		glGenBuffers(1, &m_storage_buffer);
5551		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5552		glBufferData(GL_SHADER_STORAGE_BUFFER, 24 * sizeof(GLuint), NULL, GL_STATIC_DRAW);
5553
5554		glUseProgram(m_program);
5555		GLuint values[24] = { 1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  9u,  10u, 11u, 12u,
5556							  13u, 14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u };
5557		glUniform1uiv(glGetUniformLocation(m_program, "g_uniform"), 24, values);
5558		glDispatchCompute(1, 1, 1);
5559		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5560
5561		long	error = NO_ERROR;
5562		GLuint* data;
5563		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
5564		data =
5565			static_cast<GLuint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * 24, GL_MAP_READ_BIT));
5566		for (GLuint i = 0; i < 24; ++i)
5567		{
5568			if (data[i] != (i + 25))
5569			{
5570				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
5571													<< data[i] << " should be" << (i + 25) << tcu::TestLog::EndMessage;
5572				error = ERROR;
5573			}
5574		}
5575		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
5576		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
5577		return error;
5578	}
5579
5580	virtual long Cleanup()
5581	{
5582		glUseProgram(0);
5583		glDeleteProgram(m_program);
5584		glDeleteBuffers(1, &m_storage_buffer);
5585		return NO_ERROR;
5586	}
5587};
5588
5589} // anonymous namespace
5590
5591ComputeShaderTests::ComputeShaderTests(glcts::Context& context) : TestCaseGroup(context, "compute_shader", "")
5592{
5593}
5594
5595ComputeShaderTests::~ComputeShaderTests(void)
5596{
5597}
5598
5599void ComputeShaderTests::init()
5600{
5601	using namespace glcts;
5602	addChild(new TestSubcase(m_context, "simple-compute", TestSubcase::Create<SimpleCompute>));
5603	addChild(new TestSubcase(m_context, "one-work-group", TestSubcase::Create<BasicOneWorkGroup>));
5604	addChild(new TestSubcase(m_context, "resource-ubo", TestSubcase::Create<BasicResourceUBO>));
5605	addChild(new TestSubcase(m_context, "resource-texture", TestSubcase::Create<BasicResourceTexture>));
5606	addChild(new TestSubcase(m_context, "resource-image", TestSubcase::Create<BasicResourceImage>));
5607	addChild(new TestSubcase(m_context, "resource-atomic-counter", TestSubcase::Create<BasicResourceAtomicCounter>));
5608	addChild(new TestSubcase(m_context, "resource-uniform", TestSubcase::Create<BasicResourceUniform>));
5609	addChild(new TestSubcase(m_context, "built-in-variables", TestSubcase::Create<BasicBuiltinVariables>));
5610	addChild(new TestSubcase(m_context, "max", TestSubcase::Create<BasicMax>));
5611	addChild(new TestSubcase(m_context, "work-group-size", TestSubcase::Create<BasicWorkGroupSizeIsConst>));
5612	addChild(new TestSubcase(m_context, "build-separable", TestSubcase::Create<BasicBuildSeparable>));
5613	addChild(new TestSubcase(m_context, "shared-simple", TestSubcase::Create<BasicSharedSimple>));
5614	addChild(new TestSubcase(m_context, "shared-struct", TestSubcase::Create<BasicSharedStruct>));
5615	addChild(new TestSubcase(m_context, "dispatch-indirect", TestSubcase::Create<BasicDispatchIndirect>));
5616	addChild(new TestSubcase(m_context, "sso-compute-pipeline", TestSubcase::Create<BasicSSOComputePipeline>));
5617	addChild(new TestSubcase(m_context, "sso-case2", TestSubcase::Create<BasicSSOCase2>));
5618	addChild(new TestSubcase(m_context, "sso-case3", TestSubcase::Create<BasicSSOCase3>));
5619	addChild(new TestSubcase(m_context, "atomic-case1", TestSubcase::Create<BasicAtomicCase1>));
5620	addChild(new TestSubcase(m_context, "atomic-case2", TestSubcase::Create<BasicAtomicCase2>));
5621	addChild(new TestSubcase(m_context, "atomic-case3", TestSubcase::Create<BasicAtomicCase3>));
5622	addChild(new TestSubcase(m_context, "copy-image", TestSubcase::Create<AdvancedCopyImage>));
5623	addChild(new TestSubcase(m_context, "pipeline-pre-vs", TestSubcase::Create<AdvancedPipelinePreVS>));
5624	addChild(
5625		new TestSubcase(m_context, "pipeline-gen-draw-commands", TestSubcase::Create<AdvancedPipelineGenDrawCommands>));
5626	addChild(new TestSubcase(m_context, "pipeline-compute-chain", TestSubcase::Create<AdvancedPipelineComputeChain>));
5627	addChild(new TestSubcase(m_context, "pipeline-post-fs", TestSubcase::Create<AdvancedPipelinePostFS>));
5628	addChild(new TestSubcase(m_context, "pipeline-post-xfb", TestSubcase::Create<AdvancedPipelinePostXFB>));
5629	addChild(new TestSubcase(m_context, "shared-indexing", TestSubcase::Create<AdvancedSharedIndexing>));
5630	addChild(new TestSubcase(m_context, "shared-max", TestSubcase::Create<AdvancedSharedMax>));
5631	addChild(new TestSubcase(m_context, "resources-max", TestSubcase::Create<AdvancedResourcesMax>));
5632	addChild(new TestSubcase(m_context, "api-no-active-program", TestSubcase::Create<NegativeAPINoActiveProgram>));
5633	addChild(new TestSubcase(m_context, "api-work-group-count", TestSubcase::Create<NegativeAPIWorkGroupCount>));
5634	addChild(new TestSubcase(m_context, "api-indirect", TestSubcase::Create<NegativeAPIIndirect>));
5635	addChild(new TestSubcase(m_context, "api-program", TestSubcase::Create<NegativeAPIProgram>));
5636	addChild(
5637		new TestSubcase(m_context, "glsl-compile-time-errors", TestSubcase::Create<NegativeGLSLCompileTimeErrors>));
5638	addChild(new TestSubcase(m_context, "glsl-link-time-errors", TestSubcase::Create<NegativeGLSLLinkTimeErrors>));
5639	addChild(new TestSubcase(m_context, "api-attach-shader", TestSubcase::Create<NegativeAttachShader>));
5640}
5641} // glcts namespace
5642