1/*-------------------------------------------------------------------------
2 * OpenGL Conformance Test Suite
3 * -----------------------------
4 *
5 * Copyright (c) 2014-2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 */ /*!
20 * \file
21 * \brief
22 */ /*-------------------------------------------------------------------*/
23
24#include "gl4cComputeShaderTests.hpp"
25#include "glwEnums.hpp"
26#include "glwFunctions.hpp"
27#include "tcuMatrix.hpp"
28#include "tcuMatrixUtil.hpp"
29#include "tcuRenderTarget.hpp"
30#include <cmath>
31#include <cstdarg>
32#include <sstream>
33
34namespace gl4cts
35{
36
37using namespace glw;
38using tcu::Vec2;
39using tcu::Vec3;
40using tcu::Vec4;
41using tcu::UVec4;
42using tcu::UVec3;
43using tcu::Mat4;
44
45namespace
46{
47
48typedef Vec3  vec2;
49typedef Vec3  vec3;
50typedef Vec4  vec4;
51typedef UVec3 uvec3;
52typedef UVec4 uvec4;
53typedef Mat4  mat4;
54
55const char* const kGLSLVer = "#version 430 core\n";
56
57class ComputeShaderBase : public deqp::SubcaseBase
58{
59
60public:
61	virtual ~ComputeShaderBase()
62	{
63	}
64
65	ComputeShaderBase()
66		: renderTarget(m_context.getRenderContext().getRenderTarget()), pixelFormat(renderTarget.getPixelFormat())
67	{
68		float epsilon_zero = 1.f / (1 << 13);
69		if (pixelFormat.redBits != 0 && pixelFormat.greenBits != 0 && pixelFormat.blueBits != 0 &&
70			pixelFormat.alphaBits != 0)
71		{
72			g_color_eps = vec4(1.f / ((float)(1 << pixelFormat.redBits) - 1.0f),
73							   1.f / ((float)(1 << pixelFormat.greenBits) - 1.0f),
74							   1.f / ((float)(1 << pixelFormat.blueBits) - 1.0f),
75							   1.f / ((float)(1 << pixelFormat.alphaBits) - 1.0f)) +
76						  vec4(epsilon_zero);
77		}
78		else if (pixelFormat.redBits != 0 && pixelFormat.greenBits != 0 && pixelFormat.blueBits != 0)
79		{
80			g_color_eps = vec4(1.f / ((float)(1 << pixelFormat.redBits) - 1.0f),
81							   1.f / ((float)(1 << pixelFormat.greenBits) - 1.0f),
82							   1.f / ((float)(1 << pixelFormat.blueBits) - 1.0f), 1.f) +
83						  vec4(epsilon_zero);
84		}
85		else
86		{
87			g_color_eps = vec4(epsilon_zero);
88		}
89	}
90
91	const tcu::RenderTarget& renderTarget;
92	const tcu::PixelFormat&  pixelFormat;
93	vec4					 g_color_eps;
94
95	uvec3 IndexTo3DCoord(GLuint idx, GLuint max_x, GLuint max_y)
96	{
97		const GLuint x = idx % max_x;
98		idx /= max_x;
99		const GLuint y = idx % max_y;
100		idx /= max_y;
101		const GLuint z = idx;
102		return uvec3(x, y, z);
103	}
104
105	bool CheckProgram(GLuint program, bool* compile_error = NULL)
106	{
107		GLint compile_status = GL_TRUE;
108		GLint status		 = GL_TRUE;
109		glGetProgramiv(program, GL_LINK_STATUS, &status);
110
111		if (status == GL_FALSE)
112		{
113			GLint attached_shaders;
114			glGetProgramiv(program, GL_ATTACHED_SHADERS, &attached_shaders);
115
116			if (attached_shaders > 0)
117			{
118				std::vector<GLuint> shaders(attached_shaders);
119				glGetAttachedShaders(program, attached_shaders, NULL, &shaders[0]);
120
121				for (GLint i = 0; i < attached_shaders; ++i)
122				{
123					GLenum type;
124					glGetShaderiv(shaders[i], GL_SHADER_TYPE, reinterpret_cast<GLint*>(&type));
125					switch (type)
126					{
127					case GL_VERTEX_SHADER:
128						m_context.getTestContext().getLog()
129							<< tcu::TestLog::Message << "*** Vertex Shader ***" << tcu::TestLog::EndMessage;
130						break;
131					case GL_TESS_CONTROL_SHADER:
132						m_context.getTestContext().getLog()
133							<< tcu::TestLog::Message << "*** Tessellation Control Shader ***"
134							<< tcu::TestLog::EndMessage;
135						break;
136					case GL_TESS_EVALUATION_SHADER:
137						m_context.getTestContext().getLog()
138							<< tcu::TestLog::Message << "*** Tessellation Evaluation Shader ***"
139							<< tcu::TestLog::EndMessage;
140						break;
141					case GL_GEOMETRY_SHADER:
142						m_context.getTestContext().getLog()
143							<< tcu::TestLog::Message << "*** Geometry Shader ***" << tcu::TestLog::EndMessage;
144						break;
145					case GL_FRAGMENT_SHADER:
146						m_context.getTestContext().getLog()
147							<< tcu::TestLog::Message << "*** Fragment Shader ***" << tcu::TestLog::EndMessage;
148						break;
149					case GL_COMPUTE_SHADER:
150						m_context.getTestContext().getLog()
151							<< tcu::TestLog::Message << "*** Compute Shader ***" << tcu::TestLog::EndMessage;
152						break;
153					default:
154						m_context.getTestContext().getLog()
155							<< tcu::TestLog::Message << "*** Unknown Shader ***" << tcu::TestLog::EndMessage;
156						break;
157					}
158
159					GLint res;
160					glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &res);
161					if (res != GL_TRUE)
162						compile_status = res;
163
164					GLint length;
165					glGetShaderiv(shaders[i], GL_SHADER_SOURCE_LENGTH, &length);
166					if (length > 0)
167					{
168						std::vector<GLchar> source(length);
169						glGetShaderSource(shaders[i], length, NULL, &source[0]);
170						m_context.getTestContext().getLog()
171							<< tcu::TestLog::Message << &source[0] << tcu::TestLog::EndMessage;
172					}
173
174					glGetShaderiv(shaders[i], GL_INFO_LOG_LENGTH, &length);
175					if (length > 0)
176					{
177						std::vector<GLchar> log(length);
178						glGetShaderInfoLog(shaders[i], length, NULL, &log[0]);
179						m_context.getTestContext().getLog()
180							<< tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
181					}
182				}
183			}
184
185			GLint length;
186			glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);
187			if (length > 0)
188			{
189				std::vector<GLchar> log(length);
190				glGetProgramInfoLog(program, length, NULL, &log[0]);
191				m_context.getTestContext().getLog() << tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
192			}
193		}
194
195		if (compile_error)
196			*compile_error = (compile_status == GL_TRUE ? false : true);
197		if (compile_status != GL_TRUE)
198			return false;
199		return status == GL_TRUE ? true : false;
200	}
201
202	GLuint CreateComputeProgram(const std::string& cs)
203	{
204		const GLuint p = glCreateProgram();
205
206		if (!cs.empty())
207		{
208			const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
209			glAttachShader(p, sh);
210			glDeleteShader(sh);
211			const char* const src[2] = { kGLSLVer, cs.c_str() };
212			glShaderSource(sh, 2, src, NULL);
213			glCompileShader(sh);
214		}
215
216		return p;
217	}
218
219	GLuint CreateProgram(const std::string& vs, const std::string& fs)
220	{
221		const GLuint p = glCreateProgram();
222
223		if (!vs.empty())
224		{
225			const GLuint sh = glCreateShader(GL_VERTEX_SHADER);
226			glAttachShader(p, sh);
227			glDeleteShader(sh);
228			const char* const src[2] = { kGLSLVer, vs.c_str() };
229			glShaderSource(sh, 2, src, NULL);
230			glCompileShader(sh);
231		}
232		if (!fs.empty())
233		{
234			const GLuint sh = glCreateShader(GL_FRAGMENT_SHADER);
235			glAttachShader(p, sh);
236			glDeleteShader(sh);
237			const char* const src[2] = { kGLSLVer, fs.c_str() };
238			glShaderSource(sh, 2, src, NULL);
239			glCompileShader(sh);
240		}
241
242		return p;
243	}
244
245	GLuint BuildShaderProgram(GLenum type, const std::string& source)
246	{
247		const char* const src[2] = { kGLSLVer, source.c_str() };
248		return glCreateShaderProgramv(type, 2, src);
249	}
250
251	GLfloat distance(GLfloat p0, GLfloat p1)
252	{
253		return de::abs(p0 - p1);
254	}
255
256	inline bool ColorEqual(const vec4& c0, const vec4& c1, const vec4& epsilon)
257	{
258		if (distance(c0.x(), c1.x()) > epsilon.x())
259			return false;
260		if (distance(c0.y(), c1.y()) > epsilon.y())
261			return false;
262		if (distance(c0.z(), c1.z()) > epsilon.z())
263			return false;
264		if (distance(c0.w(), c1.w()) > epsilon.w())
265			return false;
266		return true;
267	}
268
269	inline bool ColorEqual(const vec3& c0, const vec3& c1, const vec4& epsilon)
270	{
271		if (distance(c0.x(), c1.x()) > epsilon.x())
272			return false;
273		if (distance(c0.y(), c1.y()) > epsilon.y())
274			return false;
275		if (distance(c0.z(), c1.z()) > epsilon.z())
276			return false;
277		return true;
278	}
279
280	bool ValidateReadBuffer(int x, int y, int w, int h, const vec4& expected)
281	{
282		std::vector<vec4> display(w * h);
283		glReadPixels(x, y, w, h, GL_RGBA, GL_FLOAT, &display[0]);
284
285		for (int j = 0; j < h; ++j)
286		{
287			for (int i = 0; i < w; ++i)
288			{
289				if (!ColorEqual(display[j * w + i], expected, g_color_eps))
290				{
291					m_context.getTestContext().getLog()
292						<< tcu::TestLog::Message << "Color at (" << (x + i) << ", " << (y + j) << ") is ["
293						<< display[j * w + i].x() << ", " << display[j * w + i].y() << ", " << display[j * w + i].z()
294						<< ", " << display[j * w + i].w() << "] should be [" << expected.x() << ", " << expected.y()
295						<< ", " << expected.z() << ", " << expected.w() << "]." << tcu::TestLog::EndMessage;
296					return false;
297				}
298			}
299		}
300
301		return true;
302	}
303
304	bool ValidateReadBufferCenteredQuad(int width, int height, const vec3& expected)
305	{
306		bool			  result = true;
307		std::vector<vec3> fb(width * height);
308		glReadPixels(0, 0, width, height, GL_RGB, GL_FLOAT, &fb[0]);
309
310		int startx = int(((float)width * 0.1f) + 1);
311		int starty = int(((float)height * 0.1f) + 1);
312		int endx   = int((float)width - 2 * (((float)width * 0.1f) + 1) - 1);
313		int endy   = int((float)height - 2 * (((float)height * 0.1f) + 1) - 1);
314
315		for (int y = starty; y < endy; ++y)
316		{
317			for (int x = startx; x < endx; ++x)
318			{
319				const int idx = y * width + x;
320				if (!ColorEqual(fb[idx], expected, g_color_eps))
321				{
322					return false;
323				}
324			}
325		}
326
327		if (!ColorEqual(fb[2 * width + 2], vec3(0), g_color_eps))
328		{
329			result = false;
330		}
331		if (!ColorEqual(fb[2 * width + (width - 3)], vec3(0), g_color_eps))
332		{
333			result = false;
334		}
335		if (!ColorEqual(fb[(height - 3) * width + (width - 3)], vec3(0), g_color_eps))
336		{
337			result = false;
338		}
339		if (!ColorEqual(fb[(height - 3) * width + 2], vec3(0), g_color_eps))
340		{
341			result = false;
342		}
343
344		return result;
345	}
346
347	int getWindowWidth()
348	{
349		return renderTarget.getWidth();
350	}
351
352	int getWindowHeight()
353	{
354		return renderTarget.getHeight();
355	}
356
357	bool ValidateWindow4Quads(const vec3& lb, const vec3& rb, const vec3& rt, const vec3& lt)
358	{
359		int				  width  = 100;
360		int				  height = 100;
361		std::vector<vec3> fb(width * height);
362		glReadPixels(0, 0, width, height, GL_RGB, GL_FLOAT, &fb[0]);
363
364		bool status = true;
365
366		// left-bottom quad
367		for (int y = 10; y < height / 2 - 10; ++y)
368		{
369			for (int x = 10; x < width / 2 - 10; ++x)
370			{
371				const int idx = y * width + x;
372				if (!ColorEqual(fb[idx], lb, g_color_eps))
373				{
374					m_context.getTestContext().getLog()
375						<< tcu::TestLog::Message << "First bad color (" << x << ", " << y << "): " << fb[idx].x() << " "
376						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
377					status = false;
378				}
379			}
380		}
381		// right-bottom quad
382		for (int y = 10; y < height / 2 - 10; ++y)
383		{
384			for (int x = width / 2 + 10; x < width - 10; ++x)
385			{
386				const int idx = y * width + x;
387				if (!ColorEqual(fb[idx], rb, g_color_eps))
388				{
389					m_context.getTestContext().getLog()
390						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
391						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
392					status = false;
393				}
394			}
395		}
396		// right-top quad
397		for (int y = height / 2 + 10; y < height - 10; ++y)
398		{
399			for (int x = width / 2 + 10; x < width - 10; ++x)
400			{
401				const int idx = y * width + x;
402				if (!ColorEqual(fb[idx], rt, g_color_eps))
403				{
404					m_context.getTestContext().getLog()
405						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
406						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
407					status = false;
408				}
409			}
410		}
411		// left-top quad
412		for (int y = height / 2 + 10; y < height - 10; ++y)
413		{
414			for (int x = 10; x < width / 2 - 10; ++x)
415			{
416				const int idx = y * width + x;
417				if (!ColorEqual(fb[idx], lt, g_color_eps))
418				{
419					m_context.getTestContext().getLog()
420						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
421						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
422					status = false;
423				}
424			}
425		}
426		// middle horizontal line should be black
427		for (int y = height / 2 - 2; y < height / 2 + 2; ++y)
428		{
429			for (int x = 0; x < width; ++x)
430			{
431				const int idx = y * width + x;
432				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
433				{
434					m_context.getTestContext().getLog()
435						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
436						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
437					status = false;
438				}
439			}
440		}
441		// middle vertical line should be black
442		for (int y = 0; y < height; ++y)
443		{
444			for (int x = width / 2 - 2; x < width / 2 + 2; ++x)
445			{
446				const int idx = y * width + x;
447				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
448				{
449					m_context.getTestContext().getLog()
450						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
451						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
452					status = false;
453				}
454			}
455		}
456
457		return status;
458	}
459
460	bool IsEqual(vec4 a, vec4 b)
461	{
462		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
463	}
464
465	bool IsEqual(uvec4 a, uvec4 b)
466	{
467		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
468	}
469};
470
471class SimpleCompute : public ComputeShaderBase
472{
473
474	virtual std::string Title()
475	{
476		return "Simplest possible Compute Shader";
477	}
478
479	virtual std::string Purpose()
480	{
481		return "1. Verify that CS can be created, compiled and linked.\n"
482			   "2. Verify that local work size can be queried with GetProgramiv command.\n"
483			   "3. Verify that CS can be dispatched with DispatchCompute command.\n"
484			   "4. Verify that CS can write to SSBO.";
485	}
486
487	virtual std::string Method()
488	{
489		return "Create and dispatch CS. Verify SSBO content.";
490	}
491
492	virtual std::string PassCriteria()
493	{
494		return "Everything works as expected.";
495	}
496
497	GLuint m_program;
498	GLuint m_buffer;
499
500	virtual long Setup()
501	{
502
503		const char* const glsl_cs =
504			NL "layout(local_size_x = 1, local_size_y = 1) in;" NL "layout(std430) buffer Output {" NL "  vec4 data;" NL
505			   "} g_out;" NL "void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
506		m_program = CreateComputeProgram(glsl_cs);
507		glLinkProgram(m_program);
508		if (!CheckProgram(m_program))
509			return ERROR;
510
511		GLint v[3];
512		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
513		if (v[0] != 1 || v[1] != 1 || v[2] != 1)
514		{
515			m_context.getTestContext().getLog()
516				<< tcu::TestLog::Message << "Got " << v[0] << ", " << v[1] << ", " << v[2]
517				<< ", expected: 1, 1, 1 in GL_COMPUTE_WORK_GROUP_SIZE check" << tcu::TestLog::EndMessage;
518			return ERROR;
519		}
520
521		glGenBuffers(1, &m_buffer);
522		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
523		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), NULL, GL_DYNAMIC_DRAW);
524		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
525
526		return NO_ERROR;
527	}
528
529	virtual long Run()
530	{
531		glUseProgram(m_program);
532		glDispatchCompute(1, 1, 1);
533
534		vec4* data;
535		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer);
536		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
537		data	   = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
538		long error = NO_ERROR;
539		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
540		{
541			error = ERROR;
542		}
543		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
544		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
545		return error;
546	}
547
548	virtual long Cleanup()
549	{
550		glUseProgram(0);
551		glDeleteProgram(m_program);
552		glDeleteBuffers(1, &m_buffer);
553		return NO_ERROR;
554	}
555};
556
557class BasicOneWorkGroup : public ComputeShaderBase
558{
559
560	virtual std::string Title()
561	{
562		return "One work group with various local sizes";
563	}
564
565	virtual std::string Purpose()
566	{
567		return NL "1. Verify that declared local work size has correct effect." NL
568				  "2. Verify that the number of shader invocations is correct." NL
569				  "3. Verify that the built-in variables: gl_WorkGroupSize, gl_WorkGroupID, gl_GlobalInvocationID," NL
570				  "    gl_LocalInvocationID and gl_LocalInvocationIndex has correct values." NL
571				  "4. Verify that DispatchCompute and DispatchComputeIndirect commands work as expected.";
572	}
573
574	virtual std::string Method()
575	{
576		return NL "1. Create several CS with various local sizes." NL
577				  "2. Dispatch each CS with DispatchCompute and DispatchComputeIndirect commands." NL
578				  "3. Verify SSBO content.";
579	}
580
581	virtual std::string PassCriteria()
582	{
583		return "Everything works as expected.";
584	}
585
586	GLuint m_program;
587	GLuint m_storage_buffer;
588	GLuint m_dispatch_buffer;
589
590	std::string GenSource(int x, int y, int z, GLuint binding)
591	{
592		std::stringstream ss;
593		ss << NL "layout(local_size_x = " << x << ", local_size_y = " << y << ", local_size_z = " << z
594		   << ") in;" NL "layout(std430, binding = " << binding
595		   << ") buffer Output {" NL "  uvec4 local_id[];" NL "} g_out;" NL "void main() {" NL
596			  "  if (gl_WorkGroupSize == uvec3("
597		   << x << ", " << y << ", " << z
598		   << ") && gl_WorkGroupID == uvec3(0) &&" NL "      gl_GlobalInvocationID == gl_LocalInvocationID) {" NL
599			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(gl_LocalInvocationID, 0);" NL "  } else {" NL
600			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(0xffff);" NL "  }" NL "}";
601		return ss.str();
602	}
603
604	bool RunIteration(int local_size_x, int local_size_y, int local_size_z, GLuint binding, bool dispatch_indirect)
605	{
606		if (m_program != 0)
607			glDeleteProgram(m_program);
608		m_program = CreateComputeProgram(GenSource(local_size_x, local_size_y, local_size_z, binding));
609		glLinkProgram(m_program);
610		if (!CheckProgram(m_program))
611			return false;
612
613		GLint v[3];
614		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
615		if (v[0] != local_size_x || v[1] != local_size_y || v[2] != local_size_z)
616		{
617			m_context.getTestContext().getLog()
618				<< tcu::TestLog::Message << "GL_COMPUTE_LOCAL_WORK_SIZE is (" << v[0] << " " << v[1] << " " << v[2]
619				<< ") should be (" << local_size_x << " " << local_size_y << " " << local_size_z << ")"
620				<< tcu::TestLog::EndMessage;
621			return false;
622		}
623
624		const int kSize = local_size_x * local_size_y * local_size_z;
625
626		if (m_storage_buffer == 0)
627			glGenBuffers(1, &m_storage_buffer);
628		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, m_storage_buffer);
629		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kSize, NULL, GL_DYNAMIC_DRAW);
630		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
631
632		glUseProgram(m_program);
633		if (dispatch_indirect)
634		{
635			const GLuint num_groups[3] = { 1, 1, 1 };
636			if (m_dispatch_buffer == 0)
637				glGenBuffers(1, &m_dispatch_buffer);
638			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
639			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_DRAW);
640			glDispatchComputeIndirect(0);
641		}
642		else
643		{
644			glDispatchCompute(1, 1, 1);
645		}
646
647		uvec4* data;
648		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
649		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
650		data =
651			static_cast<uvec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, kSize * sizeof(uvec4), GL_MAP_READ_BIT));
652
653		bool ret = true;
654
655		for (int z = 0; z < local_size_z; ++z)
656		{
657			for (int y = 0; y < local_size_y; ++y)
658			{
659				for (int x = 0; x < local_size_x; ++x)
660				{
661					const int index = z * local_size_x * local_size_y + y * local_size_x + x;
662					if (!IsEqual(data[index], uvec4(x, y, z, 0)))
663					{
664						m_context.getTestContext().getLog()
665							<< tcu::TestLog::Message << "Invalid data at offset " << index << tcu::TestLog::EndMessage;
666						ret = false;
667					}
668				}
669			}
670		}
671		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
672		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
673		return ret;
674	}
675
676	virtual long Setup()
677	{
678		m_program		  = 0;
679		m_storage_buffer  = 0;
680		m_dispatch_buffer = 0;
681		return NO_ERROR;
682	}
683
684	virtual long Run()
685	{
686		if (!RunIteration(16, 1, 1, 0, true))
687			return ERROR;
688		if (!RunIteration(8, 8, 1, 1, false))
689			return ERROR;
690		if (!RunIteration(4, 4, 4, 2, true))
691			return ERROR;
692		if (!RunIteration(1, 2, 3, 3, false))
693			return ERROR;
694		if (!RunIteration(1024, 1, 1, 3, true))
695			return ERROR;
696		if (!RunIteration(16, 8, 8, 3, false))
697			return ERROR;
698		if (!RunIteration(32, 1, 32, 7, true))
699			return ERROR;
700		return NO_ERROR;
701	}
702
703	virtual long Cleanup()
704	{
705		glUseProgram(0);
706		glDeleteProgram(m_program);
707		glDeleteBuffers(1, &m_storage_buffer);
708		glDeleteBuffers(1, &m_dispatch_buffer);
709		return NO_ERROR;
710	}
711};
712
713class BasicResourceUBO : public ComputeShaderBase
714{
715
716	virtual std::string Title()
717	{
718		return "Compute Shader resources - UBOs";
719	}
720
721	virtual std::string Purpose()
722	{
723		return "Verify that CS is able to read data from UBOs and write it to SSBO.";
724	}
725
726	virtual std::string Method()
727	{
728		return NL "1. Create CS which uses array of UBOs." NL
729				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
730				  "3. Read data from each UBO and write it to SSBO." NL "4. Verify SSBO content." NL
731				  "5. Repeat for different buffer and CS work sizes.";
732	}
733
734	virtual std::string PassCriteria()
735	{
736		return "Everything works as expected.";
737	}
738
739	GLuint m_program;
740	GLuint m_storage_buffer;
741	GLuint m_uniform_buffer[12];
742	GLuint m_dispatch_buffer;
743
744	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
745	{
746		const uvec3		  global_size = local_size * num_groups;
747		std::stringstream ss;
748		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
749		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
750		   << ", " << global_size.y() << ", " << global_size.z()
751		   << ");" NL "layout(std140) uniform InputBuffer {" NL "  vec4 data["
752		   << global_size.x() * global_size.y() * global_size.z()
753		   << "];" NL "} g_in_buffer[12];" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
754		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
755		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
756		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data3["
757		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data4["
758		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data5["
759		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data6["
760		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data7["
761		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data8["
762		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data9["
763		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data10["
764		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data11["
765		   << global_size.x() * global_size.y() * global_size.z()
766		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  const uint global_index = gl_GlobalInvocationID.x +" NL
767			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
768			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
769			  "  g_out_buffer.data0[global_index] = g_in_buffer[0].data[global_index];" NL
770			  "  g_out_buffer.data1[global_index] = g_in_buffer[1].data[global_index];" NL
771			  "  g_out_buffer.data2[global_index] = g_in_buffer[2].data[global_index];" NL
772			  "  g_out_buffer.data3[global_index] = g_in_buffer[3].data[global_index];" NL
773			  "  g_out_buffer.data4[global_index] = g_in_buffer[4].data[global_index];" NL
774			  "  g_out_buffer.data5[global_index] = g_in_buffer[5].data[global_index];" NL
775			  "  g_out_buffer.data6[global_index] = g_in_buffer[6].data[global_index];" NL
776			  "  g_out_buffer.data7[global_index] = g_in_buffer[7].data[global_index];" NL
777			  "  g_out_buffer.data8[global_index] = g_in_buffer[8].data[global_index];" NL
778			  "  g_out_buffer.data9[global_index] = g_in_buffer[9].data[global_index];" NL
779			  "  g_out_buffer.data10[global_index] = g_in_buffer[10].data[global_index];" NL
780			  "  g_out_buffer.data11[global_index] = g_in_buffer[11].data[global_index];" NL "}";
781		return ss.str();
782	}
783
784	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
785	{
786		if (m_program != 0)
787			glDeleteProgram(m_program);
788		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
789		glLinkProgram(m_program);
790		if (!CheckProgram(m_program))
791			return false;
792
793		for (GLuint i = 0; i < 12; ++i)
794		{
795			char name[32];
796			sprintf(name, "InputBuffer[%u]", i);
797			const GLuint index = glGetUniformBlockIndex(m_program, name);
798			glUniformBlockBinding(m_program, index, i);
799			GLint p = 0;
800			glGetActiveUniformBlockiv(m_program, index, GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER, &p);
801			if (p == GL_FALSE)
802			{
803				m_context.getTestContext().getLog()
804					<< tcu::TestLog::Message << "UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER should be TRUE."
805					<< tcu::TestLog::EndMessage;
806				return false;
807			}
808		}
809
810		const GLuint kBufferSize =
811			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
812
813		if (m_storage_buffer == 0)
814			glGenBuffers(1, &m_storage_buffer);
815		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
816		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 12, NULL, GL_DYNAMIC_DRAW);
817		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
818
819		if (m_uniform_buffer[0] == 0)
820			glGenBuffers(12, m_uniform_buffer);
821		for (GLuint i = 0; i < 12; ++i)
822		{
823			std::vector<vec4> data(kBufferSize);
824			for (GLuint j = 0; j < kBufferSize; ++j)
825			{
826				data[j] = vec4(static_cast<float>(i * kBufferSize + j));
827			}
828			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
829			glBufferData(GL_UNIFORM_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
830		}
831		glBindBuffer(GL_UNIFORM_BUFFER, 0);
832
833		glUseProgram(m_program);
834		if (dispatch_indirect)
835		{
836			if (m_dispatch_buffer == 0)
837				glGenBuffers(1, &m_dispatch_buffer);
838			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
839			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
840			glDispatchComputeIndirect(0);
841		}
842		else
843		{
844			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
845		}
846
847		std::vector<vec4> data(kBufferSize * 12);
848		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
849		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
850		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize * 12, &data[0]);
851
852		for (GLuint z = 0; z < local_size.z() * num_groups.z(); ++z)
853		{
854			for (GLuint y = 0; y < local_size.y() * num_groups.y(); ++y)
855			{
856				for (GLuint x = 0; x < local_size.x() * num_groups.x(); ++x)
857				{
858					const GLuint index = z * local_size.x() * num_groups.x() * local_size.y() * num_groups.y() +
859										 y * local_size.x() * num_groups.x() + x;
860					for (int i = 0; i < 1; ++i)
861					{
862						if (!IsEqual(data[index * 12 + i], vec4(static_cast<float>(index * 12 + i))))
863						{
864							m_context.getTestContext().getLog() << tcu::TestLog::Message << "Incorrect data at offset "
865																<< index * 12 + i << "." << tcu::TestLog::EndMessage;
866							return false;
867						}
868					}
869				}
870			}
871		}
872		return true;
873	}
874
875	virtual long Setup()
876	{
877		m_program		 = 0;
878		m_storage_buffer = 0;
879		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
880		m_dispatch_buffer = 0;
881		return NO_ERROR;
882	}
883
884	virtual long Run()
885	{
886		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
887			return ERROR;
888		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), true))
889			return ERROR;
890		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
891			return ERROR;
892		return NO_ERROR;
893	}
894
895	virtual long Cleanup()
896	{
897		glUseProgram(0);
898		glDeleteProgram(m_program);
899		glDeleteBuffers(1, &m_storage_buffer);
900		glDeleteBuffers(12, m_uniform_buffer);
901		glDeleteBuffers(1, &m_dispatch_buffer);
902		return NO_ERROR;
903	}
904};
905
906class BasicResourceTexture : public ComputeShaderBase
907{
908
909	virtual std::string Title()
910	{
911		return NL "Compute Shader resources - Textures";
912	}
913
914	virtual std::string Purpose()
915	{
916		return NL "Verify that texture access works correctly in CS.";
917	}
918
919	virtual std::string Method()
920	{
921		return NL "1. Create CS which uses all sampler types (sampler1D, sampler2D, sampler3D, sampler2DRect," NL
922				  "    sampler1DArray, sampler2DArray, samplerBuffer, sampler2DMS, sampler2DMSArray)." NL
923				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
924				  "3. Sample each texture and write sampled value to SSBO." NL "4. Verify SSBO content." NL
925				  "5. Repeat for different texture and CS work sizes.";
926	}
927
928	virtual std::string PassCriteria()
929	{
930		return NL "Everything works as expected.";
931	}
932
933	GLuint m_program;
934	GLuint m_storage_buffer;
935	GLuint m_texture[9];
936	GLuint m_texture_buffer;
937	GLuint m_dispatch_buffer;
938
939	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
940	{
941		const uvec3		  global_size = local_size * num_groups;
942		std::stringstream ss;
943		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
944		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
945		   << ", " << global_size.y() << ", " << global_size.z()
946		   << ");" NL "uniform sampler1D g_sampler0;" NL "uniform sampler2D g_sampler1;" NL
947			  "uniform sampler3D g_sampler2;" NL "uniform sampler2DRect g_sampler3;" NL
948			  "uniform sampler1DArray g_sampler4;" NL "uniform sampler2DArray g_sampler5;" NL
949			  "uniform samplerBuffer g_sampler6;" NL "uniform sampler2DMS g_sampler7;" NL
950			  "uniform sampler2DMSArray g_sampler8;" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
951		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
952		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
953		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data3["
954		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data4["
955		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data5["
956		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data6["
957		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data7["
958		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data8["
959		   << global_size.x() * global_size.y() * global_size.z()
960		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  const uint global_index = gl_GlobalInvocationID.x +" NL
961			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
962			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
963			  "  g_out_buffer.data0[global_index] = texelFetch(g_sampler0, int(gl_GlobalInvocationID), 0);" NL
964			  "  g_out_buffer.data1[global_index] = texture(g_sampler1, vec2(gl_GlobalInvocationID) / "
965			  "vec2(kGlobalSize));" NL "  g_out_buffer.data2[global_index] = textureProj(g_sampler2, "
966			  "vec4(vec3(gl_GlobalInvocationID) / vec3(kGlobalSize), 1.0));" NL
967			  "  g_out_buffer.data3[global_index] = textureProjOffset(g_sampler3, vec3(vec2(gl_GlobalInvocationID), "
968			  "1.0), ivec2(0));" NL "  g_out_buffer.data4[global_index] = textureLodOffset(g_sampler4, "
969			  "vec2(gl_GlobalInvocationID.x / kGlobalSize.x, gl_GlobalInvocationID.y), 0.0, "
970			  "0);" NL "  g_out_buffer.data5[global_index] = texelFetchOffset(g_sampler5, "
971			  "ivec3(gl_GlobalInvocationID), 0, ivec2(0));" NL
972			  "  g_out_buffer.data6[global_index] = texelFetch(g_sampler6, int(global_index));" NL
973			  "  g_out_buffer.data7[global_index] = texelFetch(g_sampler7, ivec2(gl_GlobalInvocationID), 1);" NL
974			  "  g_out_buffer.data8[global_index] = texelFetch(g_sampler8, ivec3(gl_GlobalInvocationID), 2);" NL "}";
975		return ss.str();
976	}
977
978	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
979	{
980		if (m_program != 0)
981			glDeleteProgram(m_program);
982		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
983		glLinkProgram(m_program);
984		if (!CheckProgram(m_program))
985			return false;
986
987		glUseProgram(m_program);
988		for (int i = 0; i < 9; ++i)
989		{
990			char name[32];
991			sprintf(name, "g_sampler%d", i);
992			glUniform1i(glGetUniformLocation(m_program, name), i);
993		}
994		glUseProgram(0);
995
996		const GLuint kBufferSize =
997			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
998		const GLint kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
999		const GLint kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1000		const GLint kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1001
1002		std::vector<vec4> buffer_data(kBufferSize * 9);
1003		if (m_storage_buffer == 0)
1004			glGenBuffers(1, &m_storage_buffer);
1005		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1006		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 9, &buffer_data[0], GL_DYNAMIC_DRAW);
1007		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1008
1009		std::vector<vec4> texture_data(kBufferSize, vec4(123.0f));
1010		if (m_texture[0] == 0)
1011			glGenTextures(9, m_texture);
1012		if (m_texture_buffer == 0)
1013			glGenBuffers(1, &m_texture_buffer);
1014
1015		glActiveTexture(GL_TEXTURE0);
1016		glBindTexture(GL_TEXTURE_1D, m_texture[0]);
1017		glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1018		glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1019		glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, kWidth, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1020
1021		glActiveTexture(GL_TEXTURE1);
1022		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
1023		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1024		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1025		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1026
1027		glActiveTexture(GL_TEXTURE2);
1028		glBindTexture(GL_TEXTURE_3D, m_texture[2]);
1029		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1030		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1031		glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1032
1033		glActiveTexture(GL_TEXTURE3);
1034		glBindTexture(GL_TEXTURE_RECTANGLE, m_texture[3]);
1035		glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1036		glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1037		glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1038
1039		glActiveTexture(GL_TEXTURE4);
1040		glBindTexture(GL_TEXTURE_1D_ARRAY, m_texture[4]);
1041		glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1042		glTexParameteri(GL_TEXTURE_1D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1043		glTexImage2D(GL_TEXTURE_1D_ARRAY, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1044
1045		glActiveTexture(GL_TEXTURE5);
1046		glBindTexture(GL_TEXTURE_2D_ARRAY, m_texture[5]);
1047		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1048		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1049		glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT,
1050					 &texture_data[0]);
1051
1052		glActiveTexture(GL_TEXTURE6);
1053		glBindBuffer(GL_TEXTURE_BUFFER, m_texture_buffer);
1054		glBufferData(GL_TEXTURE_BUFFER, kBufferSize * sizeof(vec4), &texture_data[0], GL_DYNAMIC_DRAW);
1055		glBindBuffer(GL_TEXTURE_BUFFER, 0);
1056		glBindTexture(GL_TEXTURE_BUFFER, m_texture[6]);
1057		glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, m_texture_buffer);
1058
1059		glActiveTexture(GL_TEXTURE7);
1060		glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, m_texture[7]);
1061		glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, 4, GL_RGBA32F, kWidth, kHeight, GL_FALSE);
1062
1063		glActiveTexture(GL_TEXTURE8);
1064		glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, m_texture[8]);
1065		glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, 4, GL_RGBA32F, kWidth, kHeight, kDepth, GL_FALSE);
1066
1067		// clear MS textures
1068		GLuint fbo;
1069		glGenFramebuffers(1, &fbo);
1070		glBindFramebuffer(GL_FRAMEBUFFER, fbo);
1071		glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texture[7], 0);
1072		glClearBufferfv(GL_COLOR, 0, &vec4(123.0f)[0]);
1073		glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_texture[8], 0);
1074		glClearBufferfv(GL_COLOR, 0, &vec4(123.0f)[0]);
1075		glDeleteFramebuffers(1, &fbo);
1076
1077		glUseProgram(m_program);
1078		if (dispatch_indirect)
1079		{
1080			if (m_dispatch_buffer == 0)
1081				glGenBuffers(1, &m_dispatch_buffer);
1082			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1083			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1084			glDispatchComputeIndirect(0);
1085		}
1086		else
1087		{
1088			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1089		}
1090
1091		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1092		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1093		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize * 9, &buffer_data[0]);
1094		for (GLuint index = 0; index < kBufferSize * 9; ++index)
1095		{
1096			if (!IsEqual(buffer_data[index], vec4(123.0f)))
1097			{
1098				m_context.getTestContext().getLog()
1099					<< tcu::TestLog::Message << "Incorrect data at index " << index << "." << tcu::TestLog::EndMessage;
1100				return false;
1101			}
1102		}
1103		return true;
1104	}
1105
1106	virtual long Setup()
1107	{
1108		m_program		 = 0;
1109		m_storage_buffer = 0;
1110		memset(m_texture, 0, sizeof(m_texture));
1111		m_texture_buffer  = 0;
1112		m_dispatch_buffer = 0;
1113		return NO_ERROR;
1114	}
1115
1116	virtual long Run()
1117	{
1118		if (!RunIteration(uvec3(4, 4, 4), uvec3(8, 1, 1), false))
1119			return ERROR;
1120		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), true))
1121			return ERROR;
1122		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), false))
1123			return ERROR;
1124		return NO_ERROR;
1125	}
1126
1127	virtual long Cleanup()
1128	{
1129		glActiveTexture(GL_TEXTURE0);
1130		glUseProgram(0);
1131		glDeleteProgram(m_program);
1132		glDeleteBuffers(1, &m_storage_buffer);
1133		glDeleteTextures(9, m_texture);
1134		glDeleteBuffers(1, &m_texture_buffer);
1135		glDeleteBuffers(1, &m_dispatch_buffer);
1136		return NO_ERROR;
1137	}
1138};
1139
1140class BasicResourceImage : public ComputeShaderBase
1141{
1142
1143	virtual std::string Title()
1144	{
1145		return NL "Compute Shader resources - Images";
1146	}
1147
1148	virtual std::string Purpose()
1149	{
1150		return NL "Verify that reading/writing GPU memory via image variables work as expected.";
1151	}
1152
1153	virtual std::string Method()
1154	{
1155		return NL "1. Create CS which uses two image2D variables to read and write underlying GPU memory." NL
1156				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1157				  "3. Verify memory content." NL "4. Repeat for different texture and CS work sizes.";
1158	}
1159
1160	virtual std::string PassCriteria()
1161	{
1162		return NL "Everything works as expected.";
1163	}
1164
1165	GLuint m_program;
1166	GLuint m_draw_program;
1167	GLuint m_texture[2];
1168	GLuint m_dispatch_buffer;
1169	GLuint m_vertex_array;
1170
1171	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1172	{
1173		const uvec3		  global_size = local_size * num_groups;
1174		std::stringstream ss;
1175		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1176		   << ", local_size_z = " << local_size.z()
1177		   << ") in;" NL "layout(rgba32f) coherent uniform image2D g_image1;" NL
1178			  "layout(rgba32f) uniform image2D g_image2;" NL "const uvec3 kGlobalSize = uvec3("
1179		   << global_size.x() << ", " << global_size.y() << ", " << global_size.z()
1180		   << ");" NL "void main() {" NL
1181			  "  if (gl_GlobalInvocationID.x >= kGlobalSize.x || gl_GlobalInvocationID.y >= kGlobalSize.y) return;" NL
1182			  "  vec4 color = vec4(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y) / 255.0;" NL
1183			  "  imageStore(g_image1, ivec2(gl_GlobalInvocationID), color);" NL
1184			  "  vec4 c = imageLoad(g_image1, ivec2(gl_GlobalInvocationID));" NL
1185			  "  imageStore(g_image2, ivec2(gl_GlobalInvocationID), c);" NL "}";
1186		return ss.str();
1187	}
1188
1189	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1190	{
1191		if (m_program != 0)
1192			glDeleteProgram(m_program);
1193		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1194		glLinkProgram(m_program);
1195		if (!CheckProgram(m_program))
1196			return false;
1197
1198		glUseProgram(m_program);
1199		glUniform1i(glGetUniformLocation(m_program, "g_image1"), 0);
1200		glUniform1i(glGetUniformLocation(m_program, "g_image2"), 1);
1201		glUseProgram(0);
1202
1203		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1204		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1205		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1206		const GLuint kSize   = kWidth * kHeight * kDepth;
1207
1208		std::vector<vec4> data(kSize);
1209		if (m_texture[0] == 0)
1210			glGenTextures(2, m_texture);
1211
1212		for (int i = 0; i < 2; ++i)
1213		{
1214			glBindTexture(GL_TEXTURE_2D, m_texture[i]);
1215			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
1216			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &data[0]);
1217		}
1218		glBindTexture(GL_TEXTURE_2D, 0);
1219
1220		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
1221		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
1222		glUseProgram(m_program);
1223		if (dispatch_indirect)
1224		{
1225			if (m_dispatch_buffer == 0)
1226				glGenBuffers(1, &m_dispatch_buffer);
1227			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1228			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1229			glDispatchComputeIndirect(0);
1230		}
1231		else
1232		{
1233			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1234		}
1235		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1236
1237		glClear(GL_COLOR_BUFFER_BIT);
1238		glActiveTexture(GL_TEXTURE0);
1239		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
1240		glActiveTexture(GL_TEXTURE1);
1241		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
1242		glUseProgram(m_draw_program);
1243		glBindVertexArray(m_vertex_array);
1244		glViewport(0, 0, kWidth, kHeight);
1245		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
1246
1247		std::vector<vec4> display(kWidth * kHeight);
1248		glReadPixels(0, 0, kWidth, kHeight, GL_RGBA, GL_FLOAT, &display[0]);
1249
1250		for (int y = 0; y < kHeight; ++y)
1251		{
1252			for (int x = 0; x < kWidth; ++x)
1253			{
1254				if (y >= getWindowHeight() || x >= getWindowWidth())
1255				{
1256					continue;
1257				}
1258				const vec4 c = vec4(float(y + x) / 255.0f);
1259				if (!ColorEqual(display[y * kWidth + x], c, g_color_eps))
1260				{
1261					m_context.getTestContext().getLog()
1262						<< tcu::TestLog::Message << "Got " << display[y * kWidth + x].x() << ", "
1263						<< display[y * kWidth + x].y() << ", " << display[y * kWidth + x].z() << ", "
1264						<< display[y * kWidth + x].w() << ", expected " << c.x() << ", " << c.y() << ", " << c.z()
1265						<< ", " << c.w() << " at " << x << ", " << y << tcu::TestLog::EndMessage;
1266					return false;
1267				}
1268			}
1269		}
1270
1271		return true;
1272	}
1273
1274	virtual long Setup()
1275	{
1276		m_program = 0;
1277		memset(m_texture, 0, sizeof(m_texture));
1278		m_dispatch_buffer = 0;
1279		return NO_ERROR;
1280	}
1281
1282	virtual long Run()
1283	{
1284
1285		const char* const glsl_vs =
1286			NL "out StageData {" NL "  vec2 texcoord;" NL "} vs_out;" NL
1287			   "const vec2 g_quad[] = vec2[](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));" NL "void main() {" NL
1288			   "  gl_Position = vec4(g_quad[gl_VertexID], 0, 1);" NL
1289			   "  vs_out.texcoord = 0.5 + 0.5 * g_quad[gl_VertexID];" NL "}";
1290
1291		const char* glsl_fs =
1292			NL "in StageData {" NL "  vec2 texcoord;" NL "} fs_in;" NL "layout(location = 0) out vec4 o_color;" NL
1293			   "uniform sampler2D g_image1;" NL "uniform sampler2D g_image2;" NL "void main() {" NL
1294			   "  vec4 c1 = texture(g_image1, fs_in.texcoord);" NL "  vec4 c2 = texture(g_image2, fs_in.texcoord);" NL
1295			   "  if (c1 == c2) o_color = c1;" NL "  else o_color = vec4(1, 0, 0, 1);" NL "}";
1296
1297		m_draw_program = CreateProgram(glsl_vs, glsl_fs);
1298		glLinkProgram(m_draw_program);
1299		if (!CheckProgram(m_draw_program))
1300			return ERROR;
1301
1302		glUseProgram(m_draw_program);
1303		glUniform1i(glGetUniformLocation(m_draw_program, "g_image1"), 0);
1304		glUniform1i(glGetUniformLocation(m_draw_program, "g_image2"), 1);
1305		glUseProgram(0);
1306
1307		glGenVertexArrays(1, &m_vertex_array);
1308
1309		if (!pixelFormat.alphaBits)
1310		{
1311			m_context.getTestContext().getLog()
1312				<< tcu::TestLog::Message << "Test requires default framebuffer alpha bits" << tcu::TestLog::EndMessage;
1313			return NO_ERROR;
1314		}
1315
1316		if (!RunIteration(uvec3(8, 16, 1), uvec3(8, 4, 1), true))
1317			return ERROR;
1318		if (!RunIteration(uvec3(4, 32, 1), uvec3(16, 2, 1), false))
1319			return ERROR;
1320		if (!RunIteration(uvec3(16, 4, 1), uvec3(4, 16, 1), false))
1321			return ERROR;
1322		if (!RunIteration(uvec3(8, 8, 1), uvec3(8, 8, 1), true))
1323			return ERROR;
1324
1325		return NO_ERROR;
1326	}
1327
1328	virtual long Cleanup()
1329	{
1330		glUseProgram(0);
1331		glDeleteProgram(m_program);
1332		glDeleteProgram(m_draw_program);
1333		glDeleteVertexArrays(1, &m_vertex_array);
1334		glDeleteTextures(2, m_texture);
1335		glDeleteBuffers(1, &m_dispatch_buffer);
1336		glViewport(0, 0, getWindowWidth(), getWindowHeight());
1337		return NO_ERROR;
1338	}
1339};
1340
1341class BasicResourceAtomicCounter : public ComputeShaderBase
1342{
1343
1344	virtual std::string Title()
1345	{
1346		return "Compute Shader resources - Atomic Counters";
1347	}
1348
1349	virtual std::string Purpose()
1350	{
1351		return NL
1352			"1. Verify that Atomic Counters work as expected in CS." NL
1353			"2. Verify that built-in functions: atomicCounterIncrement and atomicCounterDecrement work correctly." NL
1354			"3. Verify that GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER is accepted by" NL
1355			"    GetActiveAtomicCounterBufferiv command.";
1356	}
1357
1358	virtual std::string Method()
1359	{
1360		return NL
1361			"1. Create CS which uses two atomic_uint variables." NL
1362			"2. In CS write values returned by atomicCounterIncrement and atomicCounterDecrement functions to SSBO." NL
1363			"3. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL "4. Verify SSBO content." NL
1364			"5. Repeat for different buffer and CS work sizes.";
1365	}
1366
1367	virtual std::string PassCriteria()
1368	{
1369		return "Everything works as expected.";
1370	}
1371
1372	GLuint m_program;
1373	GLuint m_storage_buffer;
1374	GLuint m_counter_buffer[2];
1375	GLuint m_dispatch_buffer;
1376
1377	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1378	{
1379		const uvec3		  global_size = local_size * num_groups;
1380		std::stringstream ss;
1381		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1382		   << ", local_size_z = " << local_size.z()
1383		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint inc_data["
1384		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uint dec_data["
1385		   << global_size.x() * global_size.y() * global_size.z()
1386		   << "];" NL "};" NL "layout(binding = 0, offset = 0) uniform atomic_uint g_inc_counter;" NL
1387			  "layout(binding = 1, offset = 0) uniform atomic_uint g_dec_counter;" NL "void main() {" NL
1388			  "  const uint index = atomicCounterIncrement(g_inc_counter);" NL "  inc_data[index] = index;" NL
1389			  "  dec_data[index] = atomicCounterDecrement(g_dec_counter);" NL "}";
1390		return ss.str();
1391	}
1392
1393	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1394	{
1395		if (m_program != 0)
1396			glDeleteProgram(m_program);
1397		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1398		glLinkProgram(m_program);
1399		if (!CheckProgram(m_program))
1400			return false;
1401
1402		GLint p[2] = { 0 };
1403		glGetActiveAtomicCounterBufferiv(m_program, 0, GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER, &p[0]);
1404		glGetActiveAtomicCounterBufferiv(m_program, 1, GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER, &p[1]);
1405
1406		if (p[0] == GL_FALSE || p[1] == GL_FALSE)
1407		{
1408			m_context.getTestContext().getLog()
1409				<< tcu::TestLog::Message << "ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER should be TRUE."
1410				<< tcu::TestLog::EndMessage;
1411			return false;
1412		}
1413
1414		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1415		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1416		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1417		const GLuint kSize   = kWidth * kHeight * kDepth;
1418
1419		if (m_storage_buffer == 0)
1420			glGenBuffers(1, &m_storage_buffer);
1421		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1422		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kSize * 2, NULL, GL_DYNAMIC_DRAW);
1423		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1424
1425		if (m_counter_buffer[0] == 0)
1426			glGenBuffers(2, m_counter_buffer);
1427
1428		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counter_buffer[0]);
1429		glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_STREAM_DRAW);
1430		*static_cast<GLuint*>(glMapBuffer(GL_ATOMIC_COUNTER_BUFFER, GL_WRITE_ONLY)) = 0;
1431		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1432
1433		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 1, m_counter_buffer[1]);
1434		glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_STREAM_DRAW);
1435		*static_cast<GLuint*>(glMapBuffer(GL_ATOMIC_COUNTER_BUFFER, GL_WRITE_ONLY)) = kSize;
1436		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1437
1438		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
1439
1440		glUseProgram(m_program);
1441		if (dispatch_indirect)
1442		{
1443			if (m_dispatch_buffer == 0)
1444				glGenBuffers(1, &m_dispatch_buffer);
1445			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1446			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1447			glDispatchComputeIndirect(0);
1448		}
1449		else
1450		{
1451			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1452		}
1453
1454		std::vector<GLuint> data(kSize);
1455		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1456		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1457		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kSize, &data[0]);
1458
1459		for (GLuint i = 0; i < kSize; ++i)
1460		{
1461			if (data[i] != i)
1462			{
1463				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Value at index " << i << " is "
1464													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
1465				return false;
1466			}
1467		}
1468
1469		GLuint value;
1470		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counter_buffer[0]);
1471		glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &value);
1472		if (value != kSize)
1473		{
1474			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Final atomic counter value (buffer 0) is "
1475												<< value << " should be " << kSize << "." << tcu::TestLog::EndMessage;
1476			return false;
1477		}
1478
1479		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counter_buffer[1]);
1480		glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &value);
1481		if (value != 0)
1482		{
1483			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Final atomic counter value (buffer 1) is "
1484												<< value << " should be 0." << tcu::TestLog::EndMessage;
1485			return false;
1486		}
1487
1488		return true;
1489	}
1490
1491	virtual long Setup()
1492	{
1493		m_program		 = 0;
1494		m_storage_buffer = 0;
1495		memset(m_counter_buffer, 0, sizeof(m_counter_buffer));
1496		m_dispatch_buffer = 0;
1497		return NO_ERROR;
1498	}
1499
1500	virtual long Run()
1501	{
1502		if (!RunIteration(uvec3(4, 3, 2), uvec3(2, 3, 4), false))
1503			return ERROR;
1504		if (!RunIteration(uvec3(1, 1, 1), uvec3(1, 1, 1), true))
1505			return ERROR;
1506		if (!RunIteration(uvec3(1, 6, 1), uvec3(1, 1, 8), false))
1507			return ERROR;
1508		if (!RunIteration(uvec3(4, 1, 2), uvec3(10, 3, 4), true))
1509			return ERROR;
1510		return NO_ERROR;
1511	}
1512
1513	virtual long Cleanup()
1514	{
1515		glUseProgram(0);
1516		glDeleteProgram(m_program);
1517		glDeleteBuffers(2, m_counter_buffer);
1518		glDeleteBuffers(1, &m_dispatch_buffer);
1519		glDeleteBuffers(1, &m_storage_buffer);
1520		return NO_ERROR;
1521	}
1522};
1523
1524class BasicResourceSubroutine : public ComputeShaderBase
1525{
1526
1527	virtual std::string Title()
1528	{
1529		return "Compute Shader resources - Subroutines";
1530	}
1531
1532	virtual std::string Purpose()
1533	{
1534		return NL "1. Verify that subroutines work as expected in CS." NL
1535				  "2. Verify that subroutines array can be indexed with gl_WorkGroupID built-in variable." NL
1536				  "3. Verify that atomicCounterIncrement, imageLoad and texelFetch functions" NL
1537				  "    work as expected when called in CS from subroutine.";
1538	}
1539
1540	virtual std::string Method()
1541	{
1542		return NL "1. Create CS which uses array of subroutines." NL
1543				  "2. In CS index subroutine array with gl_WorkGroupID built-in variable." NL
1544				  "3. In each subroutine load data from SSBO0 and write it to SSBO1." NL
1545				  "3. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1546				  "4. Verify SSBO1 content." NL "5. Repeat for different buffer and CS work sizes.";
1547	}
1548
1549	virtual std::string PassCriteria()
1550	{
1551		return "Everything works as expected.";
1552	}
1553
1554	GLuint m_program;
1555	GLuint m_atomic_counter_buffer;
1556	GLuint m_storage_buffer[2];
1557	GLuint m_buffer[2];
1558	GLuint m_texture_buffer[2];
1559
1560	virtual long Setup()
1561	{
1562		m_program				= 0;
1563		m_atomic_counter_buffer = 0;
1564		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
1565		memset(m_buffer, 0, sizeof(m_buffer));
1566		memset(m_texture_buffer, 0, sizeof(m_texture_buffer));
1567		return NO_ERROR;
1568	}
1569
1570	virtual long Run()
1571	{
1572		const char* const glsl_cs =
1573			NL "layout(local_size_x = 16) in;" NL "layout(binding = 1, std430) buffer Input {" NL "  uvec4 data[16];" NL
1574			   "} g_input;" NL "layout(std430, binding = 0) buffer Output {" NL "  uvec4 g_output[64];" NL "};" NL
1575			   "subroutine void ComputeType();" NL "subroutine uniform ComputeType Compute[4];" NL
1576			   "layout(binding = 0, offset = 0) uniform atomic_uint g_atomic_counter;" NL
1577			   "layout(rgba32ui) readonly uniform uimageBuffer g_image_buffer;" NL
1578			   "uniform usamplerBuffer g_sampler_buffer;" NL "subroutine(ComputeType)" NL "void Compute0() {" NL
1579			   "  const uint index = atomicCounterIncrement(g_atomic_counter);" NL
1580			   "  g_output[index] = uvec4(index);" NL "}" NL "subroutine(ComputeType)" NL "void Compute1() {" NL
1581			   "  g_output[gl_GlobalInvocationID.x] = g_input.data[gl_LocalInvocationIndex];" NL "}" NL
1582			   "subroutine(ComputeType)" NL "void Compute2() {" NL
1583			   "  g_output[gl_GlobalInvocationID.x] = imageLoad(g_image_buffer, int(gl_LocalInvocationIndex));" NL
1584			   "}" NL "subroutine(ComputeType)" NL "void Compute3() {" NL
1585			   "  g_output[gl_GlobalInvocationID.x] = texelFetch(g_sampler_buffer, int(gl_LocalInvocationIndex));" NL
1586			   "}" NL "void main() {" NL "  Compute[gl_WorkGroupID.x]();" NL "}";
1587		m_program = CreateComputeProgram(glsl_cs);
1588		glLinkProgram(m_program);
1589		if (!CheckProgram(m_program))
1590			return ERROR;
1591
1592		glGenBuffers(2, m_storage_buffer);
1593		/* output buffer */
1594		{
1595			std::vector<uvec4> data(64, uvec4(0xffff));
1596			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
1597			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * 64, &data[0], GL_DYNAMIC_DRAW);
1598		}
1599		/* input buffer */
1600		{
1601			std::vector<uvec4> data(16);
1602			for (GLuint i = 0; i < 16; ++i)
1603				data[i]   = uvec4(i + 16);
1604			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
1605			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * 16, &data[0], GL_DYNAMIC_DRAW);
1606		}
1607		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1608
1609		glGenBuffers(1, &m_atomic_counter_buffer);
1610		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_atomic_counter_buffer);
1611		glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), NULL, GL_STREAM_DRAW);
1612		*static_cast<GLuint*>(glMapBuffer(GL_ATOMIC_COUNTER_BUFFER, GL_WRITE_ONLY)) = 0;
1613		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1614
1615		glGenBuffers(2, m_buffer);
1616		/* image buffer */
1617		{
1618			std::vector<uvec4> data(16);
1619			for (GLuint i = 0; i < 16; ++i)
1620				data[i]   = uvec4(i + 32);
1621			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[0]);
1622			glBufferData(GL_TEXTURE_BUFFER, sizeof(uvec4) * 16, &data[0], GL_STATIC_DRAW);
1623		}
1624		/* texture buffer */
1625		{
1626			std::vector<uvec4> data(16);
1627			for (GLuint i = 0; i < 16; ++i)
1628				data[i]   = uvec4(i + 48);
1629			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[1]);
1630			glBufferData(GL_TEXTURE_BUFFER, sizeof(uvec4) * 16, &data[0], GL_STATIC_DRAW);
1631		}
1632		glBindBuffer(GL_TEXTURE_BUFFER, 0);
1633
1634		glGenTextures(2, m_texture_buffer);
1635		glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer[0]);
1636		glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32UI, m_buffer[0]);
1637		glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer[1]);
1638		glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32UI, m_buffer[1]);
1639		glBindTexture(GL_TEXTURE_BUFFER, 0);
1640
1641		const GLuint index_compute0 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute0");
1642		const GLuint index_compute1 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute1");
1643		const GLuint index_compute2 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute2");
1644		const GLuint index_compute3 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Compute3");
1645		const GLint  loc_compute0   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[0]");
1646		const GLint  loc_compute1   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[1]");
1647		const GLint  loc_compute2   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[2]");
1648		const GLint  loc_compute3   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "Compute[3]");
1649
1650		// bind resources
1651		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
1652		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
1653		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_atomic_counter_buffer);
1654		glBindImageTexture(0, m_texture_buffer[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32UI);
1655		glActiveTexture(GL_TEXTURE0);
1656		glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer[1]);
1657
1658		glUseProgram(m_program);
1659
1660		// setup subroutines
1661		GLuint indices[4];
1662		indices[loc_compute0] = index_compute0;
1663		indices[loc_compute1] = index_compute1;
1664		indices[loc_compute2] = index_compute2;
1665		indices[loc_compute3] = index_compute3;
1666		glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 4, indices);
1667
1668		glDispatchCompute(4, 1, 1);
1669
1670		std::vector<uvec4> data(64);
1671		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
1672		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1673		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(uvec4) * 64, &data[0]);
1674
1675		for (GLuint i = 0; i < 64; ++i)
1676		{
1677			if (!IsEqual(data[i], uvec4(i)))
1678			{
1679				m_context.getTestContext().getLog()
1680					<< tcu::TestLog::Message << "Invalid value at index " << i << "." << tcu::TestLog::EndMessage;
1681				return ERROR;
1682			}
1683		}
1684
1685		GLuint value;
1686		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_atomic_counter_buffer);
1687		glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &value);
1688		if (value != 16)
1689		{
1690			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Final atomic counter value is " << value
1691												<< " should be 16." << tcu::TestLog::EndMessage;
1692			return ERROR;
1693		}
1694
1695		return NO_ERROR;
1696	}
1697
1698	virtual long Cleanup()
1699	{
1700		glUseProgram(0);
1701		glDeleteProgram(m_program);
1702		glDeleteBuffers(1, &m_atomic_counter_buffer);
1703		glDeleteBuffers(2, m_storage_buffer);
1704		glDeleteBuffers(2, m_buffer);
1705		glDeleteTextures(2, m_texture_buffer);
1706		return NO_ERROR;
1707	}
1708};
1709
1710class BasicResourceUniform : public ComputeShaderBase
1711{
1712
1713	virtual std::string Title()
1714	{
1715		return "Compute Shader resources - Uniforms";
1716	}
1717
1718	virtual std::string Purpose()
1719	{
1720		return NL "1. Verify that all types of uniform variables work as expected in CS." NL
1721				  "2. Verify that uniform variables can be updated with Uniform* and ProgramUniform* commands." NL
1722				  "3. Verify that re-linking CS program works as expected.";
1723	}
1724
1725	virtual std::string Method()
1726	{
1727		return NL "1. Create CS which uses all (single precision and integer) types of uniform variables." NL
1728				  "2. Update uniform variables with ProgramUniform* commands." NL
1729				  "3. Verify that uniform variables were updated correctly." NL "4. Re-link CS program." NL
1730				  "5. Update uniform variables with Uniform* commands." NL
1731				  "6. Verify that uniform variables were updated correctly.";
1732	}
1733
1734	virtual std::string PassCriteria()
1735	{
1736		return "Everything works as expected.";
1737	}
1738
1739	GLuint m_program;
1740	GLuint m_storage_buffer;
1741
1742	virtual long Setup()
1743	{
1744		m_program		 = 0;
1745		m_storage_buffer = 0;
1746		return NO_ERROR;
1747	}
1748
1749	virtual long Run()
1750	{
1751		const char* const glsl_cs = NL
1752			"layout(local_size_x = 1) in;" NL "buffer Result {" NL "  int g_result;" NL "};" NL "uniform float g_0;" NL
1753			"uniform vec2 g_1;" NL "uniform vec3 g_2;" NL "uniform vec4 g_3;" NL "uniform mat2 g_4;" NL
1754			"uniform mat2x3 g_5;" NL "uniform mat2x4 g_6;" NL "uniform mat3x2 g_7;" NL "uniform mat3 g_8;" NL
1755			"uniform mat3x4 g_9;" NL "uniform mat4x2 g_10;" NL "uniform mat4x3 g_11;" NL "uniform mat4 g_12;" NL
1756			"uniform int g_13;" NL "uniform ivec2 g_14;" NL "uniform ivec3 g_15;" NL "uniform ivec4 g_16;" NL
1757			"uniform uint g_17;" NL "uniform uvec2 g_18;" NL "uniform uvec3 g_19;" NL "uniform uvec4 g_20;" NL NL
1758			"void main() {" NL "  g_result = 1;" NL NL "  if (g_0 != 1.0) g_result = 0;" NL
1759			"  if (g_1 != vec2(2.0, 3.0)) g_result = 0;" NL "  if (g_2 != vec3(4.0, 5.0, 6.0)) g_result = 0;" NL
1760			"  if (g_3 != vec4(7.0, 8.0, 9.0, 10.0)) g_result = 0;" NL NL
1761			"  if (g_4 != mat2(11.0, 12.0, 13.0, 14.0)) g_result = 0;" NL
1762			"  if (g_5 != mat2x3(15.0, 16.0, 17.0, 18.0, 19.0, 20.0)) g_result = 0;" NL
1763			"  if (g_6 != mat2x4(21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0)) g_result = 0;" NL NL
1764			"  if (g_7 != mat3x2(29.0, 30.0, 31.0, 32.0, 33.0, 34.0)) g_result = 0;" NL
1765			"  if (g_8 != mat3(35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0)) g_result = 0;" NL
1766			"  if (g_9 != mat3x4(44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0)) g_result = "
1767			"0;" NL NL "  if (g_10 != mat4x2(56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0)) g_result = 0;" NL
1768			"  if (g_11 != mat4x3(63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0)) g_result = "
1769			"0;" NL "  if (g_12 != mat4(75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, "
1770			"88.0, 89.0, 90.0)) g_result = 0;" NL NL "  if (g_13 != 91) g_result = 0;" NL
1771			"  if (g_14 != ivec2(92, 93)) g_result = 0;" NL "  if (g_15 != ivec3(94, 95, 96)) g_result = 0;" NL
1772			"  if (g_16 != ivec4(97, 98, 99, 100)) g_result = 0;" NL NL "  if (g_17 != 101u) g_result = 0;" NL
1773			"  if (g_18 != uvec2(102u, 103u)) g_result = 0;" NL
1774			"  if (g_19 != uvec3(104u, 105u, 106u)) g_result = 0;" NL
1775			"  if (g_20 != uvec4(107u, 108u, 109u, 110u)) g_result = 0;" NL "}";
1776		m_program = CreateComputeProgram(glsl_cs);
1777		glLinkProgram(m_program);
1778		if (!CheckProgram(m_program))
1779			return ERROR;
1780
1781		glGenBuffers(1, &m_storage_buffer);
1782		/* create buffer */
1783		{
1784			const int data = 123;
1785			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1786			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
1787		}
1788
1789		glProgramUniform1f(m_program, glGetUniformLocation(m_program, "g_0"), 1.0f);
1790		glProgramUniform2f(m_program, glGetUniformLocation(m_program, "g_1"), 2.0f, 3.0f);
1791		glProgramUniform3f(m_program, glGetUniformLocation(m_program, "g_2"), 4.0f, 5.0f, 6.0f);
1792		glProgramUniform4f(m_program, glGetUniformLocation(m_program, "g_3"), 7.0f, 8.0f, 9.0f, 10.0f);
1793
1794		/* mat2 */
1795		{
1796			const GLfloat value[4] = { 11.0f, 12.0f, 13.0f, 14.0f };
1797			glProgramUniformMatrix2fv(m_program, glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
1798		}
1799		/* mat2x3 */
1800		{
1801			const GLfloat value[6] = { 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f };
1802			glProgramUniformMatrix2x3fv(m_program, glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
1803		}
1804		/* mat2x4 */
1805		{
1806			const GLfloat value[8] = { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
1807			glProgramUniformMatrix2x4fv(m_program, glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
1808		}
1809
1810		/* mat3x2 */
1811		{
1812			const GLfloat value[6] = { 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f };
1813			glProgramUniformMatrix3x2fv(m_program, glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
1814		}
1815		/* mat3 */
1816		{
1817			const GLfloat value[9] = { 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f };
1818			glProgramUniformMatrix3fv(m_program, glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
1819		}
1820		/* mat3x4 */
1821		{
1822			const GLfloat value[12] = { 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
1823										50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f };
1824			glProgramUniformMatrix3x4fv(m_program, glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
1825		}
1826
1827		/* mat4x2 */
1828		{
1829			const GLfloat value[8] = { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f };
1830			glProgramUniformMatrix4x2fv(m_program, glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
1831		}
1832		/* mat4x3 */
1833		{
1834			const GLfloat value[12] = {
1835				63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 27.0f, 73, 74.0f
1836			};
1837			glProgramUniformMatrix4x3fv(m_program, glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
1838		}
1839		/* mat4 */
1840		{
1841			const GLfloat value[16] = { 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f,
1842										83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f };
1843			glProgramUniformMatrix4fv(m_program, glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
1844		}
1845
1846		glProgramUniform1i(m_program, glGetUniformLocation(m_program, "g_13"), 91);
1847		glProgramUniform2i(m_program, glGetUniformLocation(m_program, "g_14"), 92, 93);
1848		glProgramUniform3i(m_program, glGetUniformLocation(m_program, "g_15"), 94, 95, 96);
1849		glProgramUniform4i(m_program, glGetUniformLocation(m_program, "g_16"), 97, 98, 99, 100);
1850
1851		glProgramUniform1ui(m_program, glGetUniformLocation(m_program, "g_17"), 101);
1852		glProgramUniform2ui(m_program, glGetUniformLocation(m_program, "g_18"), 102, 103);
1853		glProgramUniform3ui(m_program, glGetUniformLocation(m_program, "g_19"), 104, 105, 106);
1854		glProgramUniform4ui(m_program, glGetUniformLocation(m_program, "g_20"), 107, 108, 109, 110);
1855
1856		glUseProgram(m_program);
1857		glDispatchCompute(1, 1, 1);
1858		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1859
1860		{
1861			int data;
1862			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
1863			if (data != 1)
1864			{
1865				m_context.getTestContext().getLog()
1866					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
1867				return ERROR;
1868			}
1869		}
1870
1871		// re-link program (all uniforms will be set to zero)
1872		glLinkProgram(m_program);
1873
1874		{
1875			const int data = 123;
1876			glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
1877		}
1878
1879		glUniform1f(glGetUniformLocation(m_program, "g_0"), 1.0f);
1880		glUniform2f(glGetUniformLocation(m_program, "g_1"), 2.0f, 3.0f);
1881		glUniform3f(glGetUniformLocation(m_program, "g_2"), 4.0f, 5.0f, 6.0f);
1882		glUniform4f(glGetUniformLocation(m_program, "g_3"), 7.0f, 8.0f, 9.0f, 10.0f);
1883
1884		/* mat2 */
1885		{
1886			const GLfloat value[4] = { 11.0f, 12.0f, 13.0f, 14.0f };
1887			glUniformMatrix2fv(glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
1888		}
1889		/* mat2x3 */
1890		{
1891			const GLfloat value[6] = { 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f };
1892			glUniformMatrix2x3fv(glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
1893		}
1894		/* mat2x4 */
1895		{
1896			const GLfloat value[8] = { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
1897			glUniformMatrix2x4fv(glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
1898		}
1899
1900		/* mat3x2 */
1901		{
1902			const GLfloat value[6] = { 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f };
1903			glUniformMatrix3x2fv(glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
1904		}
1905		/* mat3 */
1906		{
1907			const GLfloat value[9] = { 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f };
1908			glUniformMatrix3fv(glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
1909		}
1910		/* mat3x4 */
1911		{
1912			const GLfloat value[12] = { 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
1913										50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f };
1914			glUniformMatrix3x4fv(glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
1915		}
1916
1917		/* mat4x2 */
1918		{
1919			const GLfloat value[8] = { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f };
1920			glUniformMatrix4x2fv(glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
1921		}
1922		/* mat4x3 */
1923		{
1924			const GLfloat value[12] = {
1925				63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 27.0f, 73, 74.0f
1926			};
1927			glUniformMatrix4x3fv(glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
1928		}
1929		/* mat4 */
1930		{
1931			const GLfloat value[16] = { 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f,
1932										83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f };
1933			glUniformMatrix4fv(glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
1934		}
1935
1936		glUniform1i(glGetUniformLocation(m_program, "g_13"), 91);
1937		glUniform2i(glGetUniformLocation(m_program, "g_14"), 92, 93);
1938		glUniform3i(glGetUniformLocation(m_program, "g_15"), 94, 95, 96);
1939		glUniform4i(glGetUniformLocation(m_program, "g_16"), 97, 98, 99, 100);
1940
1941		glUniform1ui(glGetUniformLocation(m_program, "g_17"), 101);
1942		glUniform2ui(glGetUniformLocation(m_program, "g_18"), 102, 103);
1943		glUniform3ui(glGetUniformLocation(m_program, "g_19"), 104, 105, 106);
1944		glUniform4ui(glGetUniformLocation(m_program, "g_20"), 107, 108, 109, 110);
1945
1946		glDispatchCompute(1, 1, 1);
1947		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1948
1949		/* validate */
1950		{
1951			int data;
1952			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
1953			if (data != 1)
1954			{
1955				m_context.getTestContext().getLog()
1956					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
1957				return ERROR;
1958			}
1959		}
1960
1961		return NO_ERROR;
1962	}
1963
1964	virtual long Cleanup()
1965	{
1966		glUseProgram(0);
1967		glDeleteProgram(m_program);
1968		glDeleteBuffers(1, &m_storage_buffer);
1969		return NO_ERROR;
1970	}
1971};
1972
1973class BasicBuiltinVariables : public ComputeShaderBase
1974{
1975
1976	virtual std::string Title()
1977	{
1978		return "CS built-in variables";
1979	}
1980
1981	virtual std::string Purpose()
1982	{
1983		return NL "Verify that all (gl_WorkGroupSize, gl_WorkGroupID, gl_LocalInvocationID," NL
1984				  "gl_GlobalInvocationID, gl_NumWorkGroups, gl_WorkGroupSize)" NL
1985				  "CS built-in variables has correct values.";
1986	}
1987
1988	virtual std::string Method()
1989	{
1990		return NL "1. Create CS which writes all built-in variables to SSBO." NL
1991				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1992				  "3. Verify SSBO content." NL "4. Repeat for several different local and global work sizes.";
1993	}
1994
1995	virtual std::string PassCriteria()
1996	{
1997		return "Everything works as expected.";
1998	}
1999
2000	GLuint m_program;
2001	GLuint m_storage_buffer;
2002	GLuint m_dispatch_buffer;
2003
2004	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
2005	{
2006		const uvec3		  global_size = local_size * num_groups;
2007		std::stringstream ss;
2008		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
2009		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
2010		   << ", " << global_size.y() << ", " << global_size.z()
2011		   << ");" NL "layout(std430) buffer OutputBuffer {" NL "  uvec4 num_work_groups["
2012		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_size["
2013		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_id["
2014		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_id["
2015		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 global_invocation_id["
2016		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_index["
2017		   << global_size.x() * global_size.y() * global_size.z()
2018		   << "];" NL "} g_out_buffer;" NL "void main() {" NL
2019			  "  if ((gl_WorkGroupSize * gl_WorkGroupID + gl_LocalInvocationID) != gl_GlobalInvocationID) return;" NL
2020			  "  const uint global_index = gl_GlobalInvocationID.x +" NL
2021			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
2022			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
2023			  "  g_out_buffer.num_work_groups[global_index] = uvec4(gl_NumWorkGroups, 0);" NL
2024			  "  g_out_buffer.work_group_size[global_index] = uvec4(gl_WorkGroupSize, 0);" NL
2025			  "  g_out_buffer.work_group_id[global_index] = uvec4(gl_WorkGroupID, 0);" NL
2026			  "  g_out_buffer.local_invocation_id[global_index] = uvec4(gl_LocalInvocationID, 0);" NL
2027			  "  g_out_buffer.global_invocation_id[global_index] = uvec4(gl_GlobalInvocationID, 0);" NL
2028			  "  g_out_buffer.local_invocation_index[global_index] = uvec4(gl_LocalInvocationIndex);" NL "}";
2029		return ss.str();
2030	}
2031
2032	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
2033	{
2034		if (m_program != 0)
2035			glDeleteProgram(m_program);
2036		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
2037		glLinkProgram(m_program);
2038		if (!CheckProgram(m_program))
2039			return false;
2040
2041		const GLuint kBufferSize =
2042			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
2043
2044		std::vector<uvec4> data(kBufferSize * 6);
2045		if (m_storage_buffer == 0)
2046			glGenBuffers(1, &m_storage_buffer);
2047		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2048		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kBufferSize * 6, &data[0], GL_DYNAMIC_DRAW);
2049		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2050
2051		glUseProgram(m_program);
2052		if (dispatch_indirect)
2053		{
2054			if (m_dispatch_buffer == 0)
2055				glGenBuffers(1, &m_dispatch_buffer);
2056			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2057			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
2058			glDispatchComputeIndirect(0);
2059		}
2060		else
2061		{
2062			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
2063		}
2064
2065		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2066		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2067		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(uvec4) * kBufferSize * 6, &data[0]);
2068
2069		// gl_NumWorkGroups
2070		for (GLuint index = 0; index < kBufferSize; ++index)
2071		{
2072			if (!IsEqual(data[index], uvec4(num_groups.x(), num_groups.y(), num_groups.z(), 0)))
2073			{
2074				m_context.getTestContext().getLog()
2075					<< tcu::TestLog::Message << "gl_NumWorkGroups: Invalid data at index " << index << "."
2076					<< tcu::TestLog::EndMessage;
2077				return false;
2078			}
2079		}
2080		// gl_WorkGroupSize
2081		for (GLuint index = kBufferSize; index < 2 * kBufferSize; ++index)
2082		{
2083			if (!IsEqual(data[index], uvec4(local_size.x(), local_size.y(), local_size.z(), 0)))
2084			{
2085				m_context.getTestContext().getLog()
2086					<< tcu::TestLog::Message << "gl_WorkGroupSize: Invalid data at index " << index << "."
2087					<< tcu::TestLog::EndMessage;
2088				return false;
2089			}
2090		}
2091		// gl_WorkGroupID
2092		for (GLuint index = 2 * kBufferSize; index < 3 * kBufferSize; ++index)
2093		{
2094			uvec3 expected = IndexTo3DCoord(index - 2 * kBufferSize, local_size.x() * num_groups.x(),
2095											local_size.y() * num_groups.y());
2096			expected.x() /= local_size.x();
2097			expected.y() /= local_size.y();
2098			expected.z() /= local_size.z();
2099			if (!IsEqual(data[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
2100			{
2101				m_context.getTestContext().getLog() << tcu::TestLog::Message << "gl_WorkGroupID: Invalid data at index "
2102													<< index << "." << tcu::TestLog::EndMessage;
2103				return false;
2104			}
2105		}
2106		// gl_LocalInvocationID
2107		for (GLuint index = 3 * kBufferSize; index < 4 * kBufferSize; ++index)
2108		{
2109			uvec3 expected = IndexTo3DCoord(index - 3 * kBufferSize, local_size.x() * num_groups.x(),
2110											local_size.y() * num_groups.y());
2111			expected.x() %= local_size.x();
2112			expected.y() %= local_size.y();
2113			expected.z() %= local_size.z();
2114			if (!IsEqual(data[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
2115			{
2116				m_context.getTestContext().getLog()
2117					<< tcu::TestLog::Message << "gl_LocalInvocationID: Invalid data at index " << index << "."
2118					<< tcu::TestLog::EndMessage;
2119				return false;
2120			}
2121		}
2122		// gl_GlobalInvocationID
2123		for (GLuint index = 4 * kBufferSize; index < 5 * kBufferSize; ++index)
2124		{
2125			uvec3 expected = IndexTo3DCoord(index - 4 * kBufferSize, local_size.x() * num_groups.x(),
2126											local_size.y() * num_groups.y());
2127			if (!IsEqual(data[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
2128			{
2129				m_context.getTestContext().getLog()
2130					<< tcu::TestLog::Message << "gl_GlobalInvocationID: Invalid data at index " << index << "."
2131					<< tcu::TestLog::EndMessage;
2132				return false;
2133			}
2134		}
2135		// gl_LocalInvocationIndex
2136		for (GLuint index = 5 * kBufferSize; index < 6 * kBufferSize; ++index)
2137		{
2138			uvec3 coord = IndexTo3DCoord(index - 5 * kBufferSize, local_size.x() * num_groups.x(),
2139										 local_size.y() * num_groups.y());
2140			const GLuint expected = (coord.x() % local_size.x()) + (coord.y() % local_size.y()) * local_size.x() +
2141									(coord.z() % local_size.z()) * local_size.x() * local_size.y();
2142			if (!IsEqual(data[index], uvec4(expected)))
2143			{
2144				m_context.getTestContext().getLog()
2145					<< tcu::TestLog::Message << "gl_LocalInvocationIndex: Invalid data at index " << index << "."
2146					<< tcu::TestLog::EndMessage;
2147				return false;
2148			}
2149		}
2150		return true;
2151	}
2152
2153	virtual long Setup()
2154	{
2155		m_program		  = 0;
2156		m_storage_buffer  = 0;
2157		m_dispatch_buffer = 0;
2158		return NO_ERROR;
2159	}
2160
2161	virtual long Run()
2162	{
2163		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
2164			return ERROR;
2165		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
2166			return ERROR;
2167		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
2168			return ERROR;
2169		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
2170			return ERROR;
2171		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
2172			return ERROR;
2173		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
2174			return ERROR;
2175		return NO_ERROR;
2176	}
2177
2178	virtual long Cleanup()
2179	{
2180		glUseProgram(0);
2181		glDeleteProgram(m_program);
2182		glDeleteBuffers(1, &m_storage_buffer);
2183		glDeleteBuffers(1, &m_dispatch_buffer);
2184		return NO_ERROR;
2185	}
2186};
2187
2188class BasicMax : public ComputeShaderBase
2189{
2190
2191	virtual std::string Title()
2192	{
2193		return NL "CS max values";
2194	}
2195
2196	virtual std::string Purpose()
2197	{
2198		return NL "Verify (on the API and GLSL side) that all GL_MAX_COMPUTE_* values are not less than" NL
2199				  "required by the OpenGL specification.";
2200	}
2201
2202	virtual std::string Method()
2203	{
2204		return NL "1. Use all API commands to query all GL_MAX_COMPUTE_* values. Verify that they are correct." NL
2205				  "2. Verify all gl_MaxCompute* constants in the GLSL.";
2206	}
2207
2208	virtual std::string PassCriteria()
2209	{
2210		return NL "Everything works as expected.";
2211	}
2212
2213	GLuint m_program;
2214	GLuint m_buffer;
2215
2216	bool CheckIndexed(GLenum target, const GLint* min_values)
2217	{
2218		GLint	 i;
2219		GLint64   i64;
2220		GLfloat   f;
2221		GLdouble  d;
2222		GLboolean b;
2223
2224		for (GLuint c = 0; c < 3; c++)
2225		{
2226			glGetIntegeri_v(target, c, &i);
2227			if (i < min_values[c])
2228			{
2229				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
2230													<< min_values[c] << "." << tcu::TestLog::EndMessage;
2231				return false;
2232			}
2233		}
2234		for (GLuint c = 0; c < 3; c++)
2235		{
2236			glGetInteger64i_v(target, c, &i64);
2237			if (i64 < static_cast<GLint64>(min_values[c]))
2238			{
2239				m_context.getTestContext().getLog()
2240					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(i64) << " should be at least "
2241					<< min_values[c] << "." << tcu::TestLog::EndMessage;
2242				return false;
2243			}
2244		}
2245		for (GLuint c = 0; c < 3; c++)
2246		{
2247			glGetFloati_v(target, c, &f);
2248			if (f < static_cast<GLfloat>(min_values[c]))
2249			{
2250				m_context.getTestContext().getLog()
2251					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(f) << " should be at least "
2252					<< min_values[c] << "." << tcu::TestLog::EndMessage;
2253				return false;
2254			}
2255		}
2256		for (GLuint c = 0; c < 3; c++)
2257		{
2258			glGetDoublei_v(target, c, &d);
2259			if (d < static_cast<GLdouble>(min_values[c]))
2260			{
2261				m_context.getTestContext().getLog()
2262					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(d) << " should be at least "
2263					<< min_values[c] << "." << tcu::TestLog::EndMessage;
2264				return false;
2265			}
2266		}
2267		for (GLuint c = 0; c < 3; c++)
2268		{
2269			glGetBooleani_v(target, c, &b);
2270			if (b == GL_FALSE)
2271			{
2272				m_context.getTestContext().getLog()
2273					<< tcu::TestLog::Message << "Is GL_FALSE should be at least GL_TRUE." << tcu::TestLog::EndMessage;
2274				return false;
2275			}
2276		}
2277
2278		return true;
2279	}
2280
2281	bool Check(GLenum target, const GLint min_value)
2282	{
2283		GLint	 i;
2284		GLint64   i64;
2285		GLfloat   f;
2286		GLdouble  d;
2287		GLboolean b;
2288
2289		glGetIntegerv(target, &i);
2290		if (i < min_value)
2291		{
2292			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
2293												<< min_value << "." << tcu::TestLog::EndMessage;
2294			return false;
2295		}
2296		glGetInteger64v(target, &i64);
2297		if (static_cast<GLint>(i64) < min_value)
2298		{
2299			m_context.getTestContext().getLog()
2300				<< tcu::TestLog::Message << "Is " << static_cast<GLint>(i64) << " should be at least " << min_value
2301				<< "." << tcu::TestLog::EndMessage;
2302			return false;
2303		}
2304		glGetFloatv(target, &f);
2305		if (f < static_cast<GLfloat>(min_value))
2306		{
2307			m_context.getTestContext().getLog()
2308				<< tcu::TestLog::Message << "Is " << static_cast<GLint>(f) << " should be at least " << min_value << "."
2309				<< tcu::TestLog::EndMessage;
2310			return false;
2311		}
2312		glGetDoublev(target, &d);
2313		if (d < static_cast<GLdouble>(min_value))
2314		{
2315			m_context.getTestContext().getLog()
2316				<< tcu::TestLog::Message << "Is " << static_cast<GLint>(d) << " should be at least " << min_value << "."
2317				<< tcu::TestLog::EndMessage;
2318			return false;
2319		}
2320		glGetBooleanv(target, &b);
2321		if (b != (min_value ? GL_TRUE : GL_FALSE))
2322		{
2323			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << b << " should be "
2324												<< (min_value ? GL_TRUE : GL_FALSE) << "." << tcu::TestLog::EndMessage;
2325			return false;
2326		}
2327
2328		return true;
2329	}
2330
2331	virtual long Setup()
2332	{
2333		m_program = 0;
2334		m_buffer  = 0;
2335		return NO_ERROR;
2336	}
2337
2338	virtual long Run()
2339	{
2340		const GLint work_group_count[3] = { 65535, 65535, 65535 };
2341		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_COUNT, work_group_count))
2342			return ERROR;
2343
2344		const GLint work_group_size[3] = { 1024, 1024, 64 };
2345		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_SIZE, work_group_size))
2346			return ERROR;
2347
2348		if (!Check(GL_MAX_COMPUTE_UNIFORM_BLOCKS, 12))
2349			return ERROR;
2350		if (!Check(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, 16))
2351			return ERROR;
2352		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, 8))
2353			return ERROR;
2354		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTERS, 8))
2355			return ERROR;
2356		if (!Check(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, 32768))
2357			return ERROR;
2358
2359		if (glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::core(4, 5)))
2360		{
2361			if (!Check(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, 1024))
2362				return ERROR;
2363		}
2364		else
2365		{
2366			if (!Check(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, 512))
2367				return ERROR;
2368		}
2369
2370		if (!Check(GL_MAX_COMPUTE_IMAGE_UNIFORMS, 8))
2371			return ERROR;
2372		if (!Check(GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS, 512))
2373			return ERROR;
2374
2375		const char* const glsl_cs =
2376			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  int g_output;" NL "};" NL
2377			   "uniform ivec3 MaxComputeWorkGroupCount;" NL "uniform ivec3 MaxComputeWorkGroupSize;" NL
2378			   "uniform int MaxComputeUniformComponents;" NL "uniform int MaxComputeTextureImageUnits;" NL
2379			   "uniform int MaxComputeImageUniforms;" NL "uniform int MaxComputeAtomicCounters;" NL
2380			   "uniform int MaxComputeAtomicCounterBuffers;" NL "void main() {" NL "  g_output = 1;" NL
2381			   "  if (MaxComputeWorkGroupCount != gl_MaxComputeWorkGroupCount) g_output = 0;" NL
2382			   "  if (MaxComputeWorkGroupSize != gl_MaxComputeWorkGroupSize) g_output = 0;" NL
2383			   "  if (MaxComputeUniformComponents != gl_MaxComputeUniformComponents) g_output = 0;" NL
2384			   "  if (MaxComputeTextureImageUnits != gl_MaxComputeTextureImageUnits) g_output = 0;" NL
2385			   "  if (MaxComputeImageUniforms != gl_MaxComputeImageUniforms) g_output = 0;" NL
2386			   "  if (MaxComputeAtomicCounters != gl_MaxComputeAtomicCounters) g_output = 0;" NL
2387			   "  if (MaxComputeAtomicCounterBuffers != gl_MaxComputeAtomicCounterBuffers) g_output = 0;" NL "}";
2388		m_program = CreateComputeProgram(glsl_cs);
2389		glLinkProgram(m_program);
2390		if (!CheckProgram(m_program))
2391			return ERROR;
2392		glUseProgram(m_program);
2393
2394		GLint p[3];
2395		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &p[0]);
2396		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &p[1]);
2397		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &p[2]);
2398		glUniform3i(glGetUniformLocation(m_program, "MaxComputeWorkGroupCount"), p[0], p[1], p[2]);
2399
2400		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &p[0]);
2401		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &p[1]);
2402		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &p[2]);
2403		glUniform3iv(glGetUniformLocation(m_program, "MaxComputeWorkGroupSize"), 1, p);
2404
2405		glGetIntegerv(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, p);
2406		glUniform1i(glGetUniformLocation(m_program, "MaxComputeUniformComponents"), p[0]);
2407
2408		glGetIntegerv(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, p);
2409		glUniform1iv(glGetUniformLocation(m_program, "MaxComputeTextureImageUnits"), 1, p);
2410
2411		glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, p);
2412		glUniform1i(glGetUniformLocation(m_program, "MaxComputeImageUniforms"), p[0]);
2413
2414		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, p);
2415		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounters"), p[0]);
2416
2417		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, p);
2418		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounterBuffers"), p[0]);
2419
2420		GLint data = 0xffff;
2421		glGenBuffers(1, &m_buffer);
2422		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
2423		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint), &data, GL_DYNAMIC_DRAW);
2424
2425		glDispatchCompute(1, 1, 1);
2426
2427		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2428		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), &data);
2429
2430		return data == 1 ? NO_ERROR : ERROR;
2431	}
2432	virtual long Cleanup()
2433	{
2434		glUseProgram(0);
2435		glDeleteProgram(m_program);
2436		glDeleteBuffers(1, &m_buffer);
2437		return NO_ERROR;
2438	}
2439};
2440
2441class BasicBuildMonolithic : public ComputeShaderBase
2442{
2443
2444	virtual std::string Title()
2445	{
2446		return "Building CS monolithic program";
2447	}
2448
2449	virtual std::string Purpose()
2450	{
2451		return NL "1. Verify that building monolithic CS program works as expected." NL
2452				  "2. Verify that program consisting from 3 compilation units links as expected." NL
2453				  "3. Verify that CS consisting from 2 strings compiles as expected.";
2454	}
2455
2456	virtual std::string Method()
2457	{
2458		return NL "1. Create, compile and link CS using CreateShader, CompileShader and LinkProgram commands." NL
2459				  "2. Dispatch and verify CS program.";
2460	}
2461
2462	virtual std::string PassCriteria()
2463	{
2464		return "Everything works as expected.";
2465	}
2466
2467	virtual long Run()
2468	{
2469		const char* const cs1[2] = { "#version 430 core",
2470
2471									 NL "layout(local_size_x = 1) in;" NL "void Run();" NL "void main() {" NL
2472										"  Run();" NL "}" };
2473
2474		const char* const cs2 =
2475			"#version 430 core" NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
2476			"vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}";
2477
2478		const char* const cs3 =
2479			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(binding = 0, std430) buffer Output {" NL
2480			"  vec4 g_output;" NL "};" NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL
2481			"  return vec4(1, 2, 3, 4);" NL "}";
2482
2483		const GLuint sh1 = glCreateShader(GL_COMPUTE_SHADER);
2484
2485		GLint type;
2486		glGetShaderiv(sh1, GL_SHADER_TYPE, &type);
2487		if (static_cast<GLenum>(type) != GL_COMPUTE_SHADER)
2488		{
2489			m_context.getTestContext().getLog()
2490				<< tcu::TestLog::Message << "SHADER_TYPE should be COMPUTE_SHADER." << tcu::TestLog::EndMessage;
2491			glDeleteShader(sh1);
2492			return false;
2493		}
2494
2495		glShaderSource(sh1, 2, cs1, NULL);
2496		glCompileShader(sh1);
2497
2498		const GLuint sh2 = glCreateShader(GL_COMPUTE_SHADER);
2499		glShaderSource(sh2, 1, &cs2, NULL);
2500		glCompileShader(sh2);
2501
2502		const GLuint sh3 = glCreateShader(GL_COMPUTE_SHADER);
2503		glShaderSource(sh3, 1, &cs3, NULL);
2504		glCompileShader(sh3);
2505
2506		const GLuint p = glCreateProgram();
2507		glAttachShader(p, sh1);
2508		glAttachShader(p, sh2);
2509		glAttachShader(p, sh3);
2510		glLinkProgram(p);
2511
2512		glDeleteShader(sh1);
2513		glDeleteShader(sh2);
2514		glDeleteShader(sh3);
2515
2516		bool res = CheckProgram(p);
2517
2518		GLuint buffer;
2519		glGenBuffers(1, &buffer);
2520		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffer);
2521		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), &vec4(0.0f)[0], GL_DYNAMIC_DRAW);
2522		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2523
2524		glUseProgram(p);
2525		glDispatchCompute(1, 1, 1);
2526
2527		vec4 data;
2528		glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2529		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2530		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &data[0]);
2531		if (!IsEqual(data, vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2532		{
2533			m_context.getTestContext().getLog()
2534				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2535			res = false;
2536		}
2537
2538		glDeleteBuffers(1, &buffer);
2539		glUseProgram(0);
2540		glDeleteProgram(p);
2541
2542		return res == true ? NO_ERROR : ERROR;
2543	}
2544};
2545
2546class BasicBuildSeparable : public ComputeShaderBase
2547{
2548
2549	virtual std::string Title()
2550	{
2551		return "Building CS separable program";
2552	}
2553
2554	virtual std::string Purpose()
2555	{
2556		return NL "1. Verify that building separable CS program works as expected." NL
2557				  "2. Verify that program consisting from 4 strings works as expected.";
2558	}
2559
2560	virtual std::string Method()
2561	{
2562		return NL "1. Create, compile and link CS using CreateShaderProgramv command." NL
2563				  "2. Dispatch and verify CS program.";
2564	}
2565
2566	virtual std::string PassCriteria()
2567	{
2568		return "Everything works as expected.";
2569	}
2570
2571	virtual long Run()
2572	{
2573		const char* const cs[4] = {
2574			"#version 430 core",
2575
2576			NL "layout(local_size_x = 1) in;" NL "void Run();" NL "void main() {" NL "  Run();" NL "}",
2577			NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
2578			   "vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}",
2579			NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL "  return vec4(1, 2, 3, 4);" NL "}"
2580		};
2581
2582		const GLuint p   = glCreateShaderProgramv(GL_COMPUTE_SHADER, 4, cs);
2583		bool		 res = CheckProgram(p);
2584
2585		GLuint buffer;
2586		glGenBuffers(1, &buffer);
2587		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffer);
2588		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), &vec4(0.0f)[0], GL_DYNAMIC_DRAW);
2589		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2590
2591		glUseProgram(p);
2592		glDispatchCompute(1, 1, 1);
2593
2594		vec4 data;
2595		glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2596		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2597		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &data[0]);
2598		if (!IsEqual(data, vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2599		{
2600			m_context.getTestContext().getLog()
2601				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2602			res = false;
2603		}
2604
2605		glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &vec4(0.0f)[0]);
2606
2607		GLuint pipeline;
2608		glGenProgramPipelines(1, &pipeline);
2609		glUseProgramStages(pipeline, GL_COMPUTE_SHADER_BIT, p);
2610
2611		glUseProgram(0);
2612		glBindProgramPipeline(pipeline);
2613		glDispatchCompute(1, 1, 1);
2614
2615		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2616		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &data[0]);
2617		if (!IsEqual(data, vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2618		{
2619			m_context.getTestContext().getLog()
2620				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2621			res = false;
2622		}
2623
2624		glDeleteProgramPipelines(1, &pipeline);
2625		glDeleteBuffers(1, &buffer);
2626		glDeleteProgram(p);
2627
2628		return res == true ? NO_ERROR : ERROR;
2629	}
2630};
2631
2632class BasicSharedSimple : public ComputeShaderBase
2633{
2634	virtual std::string Title()
2635	{
2636		return "Shared Memory - simple usage";
2637	}
2638
2639	virtual std::string Purpose()
2640	{
2641		return NL "1. Verify that shared array of uints works as expected." NL
2642				  "2. Verify that shared memory written by one invocation is observable by other invocations" NL
2643				  "    when groupMemoryBarrier() and barrier() built-in functions are used.";
2644	}
2645
2646	virtual std::string Method()
2647	{
2648		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2649				  "2. Verify results written by CS to SSBO." NL
2650				  "3. Repeat for several different number of work groups.";
2651	}
2652
2653	virtual std::string PassCriteria()
2654	{
2655		return "Everything works as expected.";
2656	}
2657
2658	GLuint m_program;
2659	GLuint m_storage_buffer;
2660	GLuint m_dispatch_buffer;
2661
2662	bool RunIteration(const GLuint num_groups, bool dispatch_indirect)
2663	{
2664		const GLuint kBufferSize = 256 * num_groups;
2665
2666		std::vector<GLuint> data(kBufferSize, 0xffff);
2667		if (m_storage_buffer == 0)
2668			glGenBuffers(1, &m_storage_buffer);
2669		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2670		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
2671		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2672
2673		glUseProgram(m_program);
2674		if (dispatch_indirect)
2675		{
2676			const GLuint groups[3] = { num_groups, 1, 1 };
2677			if (m_dispatch_buffer == 0)
2678				glGenBuffers(1, &m_dispatch_buffer);
2679			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2680			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
2681			glDispatchComputeIndirect(0);
2682		}
2683		else
2684		{
2685			glDispatchCompute(num_groups, 1, 1);
2686		}
2687
2688		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2689		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2690		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, &data[0]);
2691		for (GLuint i = 0; i < kBufferSize; ++i)
2692		{
2693			if (data[i] != 1)
2694			{
2695				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
2696													<< data[i] << " should be 1." << tcu::TestLog::EndMessage;
2697				return false;
2698			}
2699		}
2700		return true;
2701	}
2702
2703	virtual long Setup()
2704	{
2705		m_program		  = 0;
2706		m_storage_buffer  = 0;
2707		m_dispatch_buffer = 0;
2708		return NO_ERROR;
2709	}
2710
2711	virtual long Run()
2712	{
2713		const char* const glsl_cs =
2714			NL "layout(local_size_x = 256) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
2715			   "shared uint g_shared_data[256];" NL "void main() {" NL
2716			   "  g_shared_data[gl_LocalInvocationID.x] = gl_LocalInvocationIndex;" NL
2717			   "  groupMemoryBarrier();" // flush memory stores
2718			NL "  barrier();"			 // wait for all stores to finish
2719			NL "  g_output[gl_GlobalInvocationID.x] = 1;" NL "  if (gl_LocalInvocationIndex < 255) {" NL
2720			   "    const uint res = g_shared_data[gl_LocalInvocationID.x + "
2721			   "1];" // load data from shared memory filled by other thread
2722			NL "    if (res != (gl_LocalInvocationIndex + 1)) {" NL "      g_output[gl_GlobalInvocationID.x] = 0;" NL
2723			   "    }" NL "  }" NL "}";
2724		m_program = CreateComputeProgram(glsl_cs);
2725		glLinkProgram(m_program);
2726		if (!CheckProgram(m_program))
2727			return ERROR;
2728
2729		if (!RunIteration(1, false))
2730			return ERROR;
2731		if (!RunIteration(8, true))
2732			return ERROR;
2733		if (!RunIteration(13, false))
2734			return ERROR;
2735		if (!RunIteration(7, true))
2736			return ERROR;
2737		return NO_ERROR;
2738	}
2739	virtual long Cleanup()
2740	{
2741		glUseProgram(0);
2742		glDeleteProgram(m_program);
2743		glDeleteBuffers(1, &m_storage_buffer);
2744		glDeleteBuffers(1, &m_dispatch_buffer);
2745		return NO_ERROR;
2746	}
2747};
2748
2749class BasicSharedStruct : public ComputeShaderBase
2750{
2751	virtual std::string Title()
2752	{
2753		return "Shared Memory - arrays and structers";
2754	}
2755
2756	virtual std::string Purpose()
2757	{
2758		return NL "1. Verify that vectors, matrices, structers and arrays of those can be used" NL
2759				  "    as a shared memory." NL
2760				  "2. Verify that shared memory can be indexed with constant values, built-in" NL
2761				  "    variables and dynamic expressions." NL
2762				  "3. Verify that memoryBarrierAtomicCounter(), memoryBarrierImage(), memoryBarrier()," NL
2763				  "     memoryBarrierBuffer() and memoryBarrierShared() built-in functions are accepted" NL
2764				  "     by the GLSL compiler.";
2765	}
2766
2767	virtual std::string Method()
2768	{
2769		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2770				  "2. Verify results written by CS to SSBO.";
2771	}
2772
2773	virtual std::string PassCriteria()
2774	{
2775		return "Everything works as expected.";
2776	}
2777
2778	GLuint m_program;
2779	GLuint m_storage_buffer;
2780	GLuint m_dispatch_buffer;
2781
2782	bool RunIteration(bool dispatch_indirect)
2783	{
2784		const GLuint kBufferSize = 256;
2785
2786		std::vector<vec4> data(kBufferSize);
2787		if (m_storage_buffer == 0)
2788			glGenBuffers(1, &m_storage_buffer);
2789		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2790		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
2791		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2792
2793		glUseProgram(m_program);
2794		if (dispatch_indirect)
2795		{
2796			const GLuint groups[3] = { 1, 1, 1 };
2797			if (m_dispatch_buffer == 0)
2798				glGenBuffers(1, &m_dispatch_buffer);
2799			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2800			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
2801			glDispatchComputeIndirect(0);
2802		}
2803		else
2804		{
2805			glDispatchCompute(1, 1, 1);
2806		}
2807
2808		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2809		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2810		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize, &data[0]);
2811		for (GLuint i = 0; i < kBufferSize; ++i)
2812		{
2813			if (!IsEqual(data[i], vec4(static_cast<float>(i))))
2814			{
2815				m_context.getTestContext().getLog()
2816					<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
2817				return false;
2818			}
2819		}
2820		return true;
2821	}
2822
2823	virtual long Setup()
2824	{
2825		m_program		  = 0;
2826		m_storage_buffer  = 0;
2827		m_dispatch_buffer = 0;
2828		return NO_ERROR;
2829	}
2830
2831	virtual long Run()
2832	{
2833		const char* const glsl_cs = NL
2834			"layout(local_size_x = 128) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[256];" NL "};" NL
2835			"struct SubData {" NL "  mat2x4 data;" NL "};" NL "struct Data {" NL "  uint index;" NL "  vec3 data0;" NL
2836			"  SubData data1;" NL "};" NL "shared Data g_shared_data[256];" NL "shared int g_shared_buf[2];" NL
2837			"void main() {" NL "  if (gl_LocalInvocationID.x == 0) {" NL "    g_shared_buf[1] = 1;" NL
2838			"    g_shared_buf[1 + gl_LocalInvocationID.x] = 0;" NL "    g_shared_buf[0] = 128;" NL
2839			"    g_output[0] = vec4(g_shared_buf[1]);" NL "    g_output[128] = vec4(g_shared_buf[0]);" NL
2840			"    memoryBarrierBuffer();" // note: this call is not needed here, just check if compiler accepts it
2841			NL "  } else {" NL "    const uint index = gl_LocalInvocationIndex;" NL
2842			"    g_shared_data[index].index = index;" NL "    g_shared_data[index + 128].index = index + 128;" NL
2843			"    g_shared_data[index].data1.data = mat2x4(0.0);" NL
2844			"    g_shared_data[index + 128].data1.data = mat2x4(0.0);" NL
2845			"    g_output[index] = vec4(g_shared_data[index].index);" // load data from shared memory
2846			NL "    g_output[index + 128] = vec4(g_shared_data[index + 128].index);" NL
2847			"    memoryBarrierShared();" // note: this call is not needed here, just check if compiler accepts it
2848			NL "  }" NL "  memoryBarrierAtomicCounter();" NL "  memoryBarrierImage();" NL
2849			"  memoryBarrier();" // note: these calls are not needed here, just check if compiler accepts them
2850			NL "}";
2851		m_program = CreateComputeProgram(glsl_cs);
2852		glLinkProgram(m_program);
2853		if (!CheckProgram(m_program))
2854			return ERROR;
2855
2856		if (!RunIteration(false))
2857			return ERROR;
2858		if (!RunIteration(true))
2859			return ERROR;
2860		return NO_ERROR;
2861	}
2862
2863	virtual long Cleanup()
2864	{
2865		glUseProgram(0);
2866		glDeleteProgram(m_program);
2867		glDeleteBuffers(1, &m_storage_buffer);
2868		glDeleteBuffers(1, &m_dispatch_buffer);
2869		return NO_ERROR;
2870	}
2871};
2872
2873class BasicDispatchIndirect : public ComputeShaderBase
2874{
2875	virtual std::string Title()
2876	{
2877		return NL "DispatchComputeIndirect command";
2878	}
2879
2880	virtual std::string Purpose()
2881	{
2882		return NL
2883			"1. Verify that DispatchComputeIndirect command works as described in the OpenGL specification." NL
2884			"2. Verify that <offset> parameter is correctly applied." NL
2885			"3. Verify that updating dispatch buffer with different methods (BufferData, BufferSubData, MapBuffer)" NL
2886			"    just before DispatchComputeIndirect call works as expected." NL
2887			"4. Verify that GL_DISPATCH_INDIRECT_BUFFER_BINDING binding point is set correctly.";
2888	}
2889
2890	virtual std::string Method()
2891	{
2892		return NL
2893			"1. Create CS and dispatch indirect buffer." NL "2. Dispatch CS with DispatchComputeIndirect command." NL
2894			"3. Update dispatch indirect buffer." NL
2895			"4. Repeat several times updating dispatch buffer with different methods and changing <offset> parameter.";
2896	}
2897
2898	virtual std::string PassCriteria()
2899	{
2900		return NL "Everything works as expected.";
2901	}
2902
2903	GLuint m_program;
2904	GLuint m_storage_buffer;
2905	GLuint m_dispatch_buffer[2];
2906
2907	bool RunIteration(GLintptr offset, GLuint buffer_size)
2908	{
2909		std::vector<GLuint> data(buffer_size);
2910		if (m_storage_buffer == 0)
2911			glGenBuffers(1, &m_storage_buffer);
2912		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2913		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * buffer_size, &data[0], GL_DYNAMIC_DRAW);
2914
2915		glDispatchComputeIndirect(offset);
2916
2917		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2918		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * buffer_size, &data[0]);
2919		for (GLuint i = 0; i < buffer_size; ++i)
2920		{
2921			if (data[i] != i)
2922			{
2923				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
2924													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
2925				return false;
2926			}
2927		}
2928		return true;
2929	}
2930
2931	bool CheckBinding(GLuint expected)
2932	{
2933		GLint	 i;
2934		GLint64   i64;
2935		GLfloat   f;
2936		GLdouble  d;
2937		GLboolean b;
2938
2939		glGetIntegerv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i);
2940		if (static_cast<GLuint>(i) != expected)
2941		{
2942			return false;
2943		}
2944		glGetInteger64v(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i64);
2945		if (static_cast<GLuint>(i64) != expected)
2946		{
2947			return false;
2948		}
2949		glGetFloatv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &f);
2950		if (static_cast<GLuint>(f) != expected)
2951		{
2952			return false;
2953		}
2954		glGetDoublev(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &d);
2955		if (static_cast<GLuint>(d) != expected)
2956		{
2957			return false;
2958		}
2959		glGetBooleanv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &b);
2960		if (b != (expected != 0 ? GL_TRUE : GL_FALSE))
2961		{
2962			return false;
2963		}
2964
2965		return true;
2966	}
2967
2968	virtual long Setup()
2969	{
2970		m_program		 = 0;
2971		m_storage_buffer = 0;
2972		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
2973		return NO_ERROR;
2974	}
2975
2976	virtual long Run()
2977	{
2978		const char* const glsl_cs =
2979			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
2980			   "uniform uvec3 g_global_size;" NL "void main() {" NL
2981			   "  const uint global_index = gl_GlobalInvocationID.x +" NL
2982			   "                            gl_GlobalInvocationID.y * g_global_size.x +" NL
2983			   "                            gl_GlobalInvocationID.z * g_global_size.x * g_global_size.y;" NL
2984			   "  if (gl_NumWorkGroups != g_global_size) {" NL "    g_output[global_index] = 0xffff;" NL
2985			   "    return;" NL "  }" NL "  g_output[global_index] = global_index;" NL "}";
2986		m_program = CreateComputeProgram(glsl_cs);
2987		glLinkProgram(m_program);
2988		if (!CheckProgram(m_program))
2989			return ERROR;
2990
2991		if (!CheckBinding(0))
2992			return ERROR;
2993
2994		glGenBuffers(2, m_dispatch_buffer);
2995
2996		const GLuint data[]  = { 1, 2, 3, 4, 5, 6, 7, 8 };
2997		const GLuint data2[] = { 3, 1, 4, 4 };
2998
2999		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
3000		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data), data, GL_STREAM_DRAW);
3001		if (!CheckBinding(m_dispatch_buffer[0]))
3002			return ERROR;
3003
3004		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
3005		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data2), data2, GL_STREAM_READ);
3006		if (!CheckBinding(m_dispatch_buffer[1]))
3007			return ERROR;
3008
3009		glUseProgram(m_program);
3010		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
3011
3012		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
3013		if (!RunIteration(0, 6))
3014			return ERROR;
3015
3016		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 2, 3, 4);
3017		if (!RunIteration(4, 24))
3018			return ERROR;
3019
3020		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 5, 6);
3021		if (!RunIteration(12, 120))
3022			return ERROR;
3023
3024		glBufferSubData(GL_DISPATCH_INDIRECT_BUFFER, 20, 12, data);
3025		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
3026		if (!RunIteration(20, 6))
3027			return ERROR;
3028
3029		GLuint* ptr = static_cast<GLuint*>(glMapBuffer(GL_DISPATCH_INDIRECT_BUFFER, GL_WRITE_ONLY));
3030		*ptr++		= 4;
3031		*ptr++		= 4;
3032		*ptr++		= 4;
3033		glUnmapBuffer(GL_DISPATCH_INDIRECT_BUFFER);
3034
3035		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 4, 4);
3036		if (!RunIteration(0, 64))
3037			return ERROR;
3038
3039		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
3040
3041		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 4, 4);
3042		if (!RunIteration(4, 16))
3043			return ERROR;
3044
3045		glDeleteBuffers(2, m_dispatch_buffer);
3046		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
3047
3048		if (!CheckBinding(0))
3049			return ERROR;
3050
3051		return NO_ERROR;
3052	}
3053	virtual long Cleanup()
3054	{
3055		glUseProgram(0);
3056		glDeleteProgram(m_program);
3057		glDeleteBuffers(1, &m_storage_buffer);
3058		glDeleteBuffers(2, m_dispatch_buffer);
3059		return NO_ERROR;
3060	}
3061};
3062
3063class BasicSSOComputePipeline : public ComputeShaderBase
3064{
3065	virtual std::string Title()
3066	{
3067		return NL "Separable CS Programs - Compute and non-compute stages (1)";
3068	}
3069	virtual std::string Purpose()
3070	{
3071		return NL "1. Verify that compute and non-compute stages can be attached to one pipeline object." NL
3072				  "2. Verify that DrawArrays and ComputeDispatch commands works as expected in this case.";
3073	}
3074	virtual std::string Method()
3075	{
3076		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
3077				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
3078				  "4. Issue MemoryBarrier command." NL
3079				  "5. Issue DrawArrays command which uses data written by the compute stage." NL "6. Verify result.";
3080	}
3081	virtual std::string PassCriteria()
3082	{
3083		return NL "Everything works as expected.";
3084	}
3085
3086	GLuint m_vsp, m_fsp, m_csp;
3087	GLuint m_storage_buffer;
3088	GLuint m_vertex_array;
3089	GLuint m_pipeline;
3090
3091	virtual long Setup()
3092	{
3093		m_vsp = m_fsp = m_csp = 0;
3094		m_storage_buffer	  = 0;
3095		m_vertex_array		  = 0;
3096		m_pipeline			  = 0;
3097		return NO_ERROR;
3098	}
3099	virtual long Run()
3100	{
3101		const char* const glsl_cs =
3102			NL "layout(local_size_x = 4) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[4];" NL "};" NL
3103			   "void main() {" NL "  const vec2 quad[4] = { vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1) };" NL
3104			   "  g_output[gl_GlobalInvocationID.x] = vec4(quad[gl_GlobalInvocationID.x], 0, 1);" NL "}";
3105
3106		m_csp = CreateComputeProgram(glsl_cs);
3107		glProgramParameteri(m_csp, GL_PROGRAM_SEPARABLE, GL_TRUE);
3108		glLinkProgram(m_csp);
3109		if (!CheckProgram(m_csp))
3110			return ERROR;
3111
3112		const char* const glsl_vs =
3113			NL "layout(location = 0) in vec4 i_position;" NL "out gl_PerVertex {" NL "  vec4 gl_Position;" NL "};" NL
3114			   "void main() {" NL "  gl_Position = i_position;" NL "}";
3115		m_vsp = BuildShaderProgram(GL_VERTEX_SHADER, glsl_vs);
3116		if (!CheckProgram(m_vsp))
3117			return ERROR;
3118
3119		const char* const glsl_fs =
3120			NL "layout(location = 0) out vec4 o_color;" NL "void main() {" NL "  o_color = vec4(0, 1, 0, 1);" NL "}";
3121		m_fsp = BuildShaderProgram(GL_FRAGMENT_SHADER, glsl_fs);
3122		if (!CheckProgram(m_fsp))
3123			return ERROR;
3124
3125		glGenProgramPipelines(1, &m_pipeline);
3126		glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, m_vsp);
3127		glUseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, m_fsp);
3128		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_csp);
3129
3130		glGenBuffers(1, &m_storage_buffer);
3131		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
3132		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * 4, NULL, GL_DYNAMIC_DRAW);
3133		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3134
3135		glGenVertexArrays(1, &m_vertex_array);
3136		glBindVertexArray(m_vertex_array);
3137		glBindBuffer(GL_ARRAY_BUFFER, m_storage_buffer);
3138		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0);
3139		glBindBuffer(GL_ARRAY_BUFFER, 0);
3140		glEnableVertexAttribArray(0);
3141		glBindVertexArray(0);
3142
3143		glBindProgramPipeline(m_pipeline);
3144		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3145		glDispatchCompute(1, 1, 1);
3146
3147		glClear(GL_COLOR_BUFFER_BIT);
3148		glBindVertexArray(m_vertex_array);
3149		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
3150		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
3151
3152		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
3153			return ERROR;
3154		return NO_ERROR;
3155	}
3156
3157	virtual long Cleanup()
3158	{
3159		glDeleteProgram(m_vsp);
3160		glDeleteProgram(m_fsp);
3161		glDeleteProgram(m_csp);
3162		glDeleteBuffers(1, &m_storage_buffer);
3163		glDeleteVertexArrays(1, &m_vertex_array);
3164		glDeleteProgramPipelines(1, &m_pipeline);
3165		return NO_ERROR;
3166	}
3167};
3168
3169class BasicSSOCase2 : public ComputeShaderBase
3170{
3171	virtual std::string Title()
3172	{
3173		return NL "Separable CS Programs - Compute and non-compute stages (2)";
3174	}
3175	virtual std::string Purpose()
3176	{
3177		return NL "1. Verify that data computed by the compute stage is visible to non-compute stage after "
3178				  "MemoryBarrier command." NL "2. Verify that ProgramParameteri(program, GL_PROGRAM_SEPARABLE, "
3179				  "GL_TRUE) command works correctly for CS." NL
3180				  "3. Verify that gl_WorkGroupSize built-in variable is a contant and can be used as an array size.";
3181	}
3182	virtual std::string Method()
3183	{
3184		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
3185				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
3186				  "4. Issue MemoryBarrier command." NL
3187				  "5. Issue DrawArrays command which uses data written to the buffer object by the compute stage." NL
3188				  "6. Verify result.";
3189	}
3190	virtual std::string PassCriteria()
3191	{
3192		return NL "Everything works as expected.";
3193	}
3194
3195	GLuint m_program_ab;
3196	GLuint m_program_c;
3197	GLuint m_pipeline;
3198	GLuint m_storage_buffer;
3199	GLuint m_vao;
3200
3201	virtual long Setup()
3202	{
3203		m_program_ab	 = 0;
3204		m_program_c		 = 0;
3205		m_pipeline		 = 0;
3206		m_storage_buffer = 0;
3207		m_vao			 = 0;
3208		return NO_ERROR;
3209	}
3210	virtual long Run()
3211	{
3212		GLint res;
3213		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
3214		if (res <= 0)
3215		{
3216			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
3217			return NO_ERROR;
3218		}
3219
3220		const char* const glsl_a =
3221			"#version 430 core" NL "layout(binding = 1, std430) buffer Input {" NL "  vec2 g_input[4];" NL "};" NL
3222			"out StageData {" NL "  vec3 color;" NL "} g_vs_out;" NL "out gl_PerVertex {" NL "  vec4 gl_Position;" NL
3223			"};" NL "void main() {" NL "  gl_Position = vec4(g_input[gl_VertexID], 0, 1);" NL
3224			"  g_vs_out.color = vec3(0, 1, 0);" NL "}";
3225
3226		const char* const glsl_b =
3227			"#version 430 core" NL "in StageData {" NL "  vec3 color;" NL "} g_fs_in;" NL
3228			"layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(g_fs_in.color, 1);" NL "}";
3229
3230		const char* const glsl_c =
3231			"#version 430 core" NL "layout(local_size_x = 4) in;" NL "layout(binding = 1, std430) buffer Output {" NL
3232			"  vec2 g_output[gl_WorkGroupSize.x];" NL "};" NL "void main() {" NL
3233			"  if (gl_GlobalInvocationID.x == 0) {" NL "    g_output[0] = vec2(-0.8, -0.8);" NL
3234			"  } else if (gl_GlobalInvocationID.x == 1) {" NL "    g_output[1] = vec2(0.8, -0.8);" NL
3235			"  } else if (gl_GlobalInvocationID.x == 2) {" NL "    g_output[2] = vec2(-0.8, 0.8);" NL
3236			"  } else if (gl_GlobalInvocationID.x == 3) {" NL "    g_output[3] = vec2(0.8, 0.8);" NL "  }" NL "}";
3237
3238		m_program_ab = glCreateProgram();
3239		GLuint sh	= glCreateShader(GL_VERTEX_SHADER);
3240		glAttachShader(m_program_ab, sh);
3241		glDeleteShader(sh);
3242		glShaderSource(sh, 1, &glsl_a, NULL);
3243		glCompileShader(sh);
3244
3245		sh = glCreateShader(GL_FRAGMENT_SHADER);
3246		glAttachShader(m_program_ab, sh);
3247		glDeleteShader(sh);
3248		glShaderSource(sh, 1, &glsl_b, NULL);
3249		glCompileShader(sh);
3250
3251		glProgramParameteri(m_program_ab, GL_PROGRAM_SEPARABLE, GL_TRUE);
3252		glLinkProgram(m_program_ab);
3253
3254		m_program_c = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_c);
3255		glGenVertexArrays(1, &m_vao);
3256		glGenProgramPipelines(1, &m_pipeline);
3257		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_ab);
3258		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_c);
3259
3260		glGenBuffers(1, &m_storage_buffer);
3261		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer);
3262		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec2) * 4, NULL, GL_STREAM_DRAW);
3263
3264		glClear(GL_COLOR_BUFFER_BIT);
3265		glBindProgramPipeline(m_pipeline);
3266		glDispatchCompute(1, 1, 1);
3267		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
3268		glBindVertexArray(m_vao);
3269		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
3270
3271		if (getWindowWidth() < 500 &&
3272			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
3273		{
3274			return ERROR;
3275		}
3276		return NO_ERROR;
3277	}
3278	virtual long Cleanup()
3279	{
3280		glDeleteProgram(m_program_ab);
3281		glDeleteProgram(m_program_c);
3282		glDeleteProgramPipelines(1, &m_pipeline);
3283		glDeleteBuffers(1, &m_storage_buffer);
3284		glDeleteVertexArrays(1, &m_vao);
3285		return NO_ERROR;
3286	}
3287};
3288
3289class BasicSSOCase3 : public ComputeShaderBase
3290{
3291	virtual std::string Title()
3292	{
3293		return NL "Separable CS Programs - Compute stage";
3294	}
3295	virtual std::string Purpose()
3296	{
3297		return NL "Verify that compute shader stage selected with UseProgram command has precedence" NL
3298				  "over compute shader stage selected with BindProgramPipeline command.";
3299	}
3300	virtual std::string Method()
3301	{
3302		return NL "1. Create CS0 with CreateProgram command. Create CS1 with CreateShaderProgramv command." NL
3303				  "2. Verify that CS program selected with UseProgram is dispatched even if there is active" NL
3304				  "    compute stage bound by BindProgramPipeline.";
3305	}
3306	virtual std::string PassCriteria()
3307	{
3308		return NL "Everything works as expected.";
3309	}
3310
3311	GLuint m_program_a;
3312	GLuint m_program_b;
3313	GLuint m_pipeline;
3314	GLuint m_storage_buffer;
3315
3316	virtual long Setup()
3317	{
3318		m_program_a		 = 0;
3319		m_program_b		 = 0;
3320		m_pipeline		 = 0;
3321		m_storage_buffer = 0;
3322		return NO_ERROR;
3323	}
3324	virtual long Run()
3325	{
3326		const char* const glsl_a =
3327			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
3328			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 1;" NL "}";
3329
3330		const char* const glsl_b =
3331			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
3332			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 2;" NL "}";
3333
3334		/* create program A */
3335		{
3336			m_program_a = glCreateProgram();
3337			GLuint sh   = glCreateShader(GL_COMPUTE_SHADER);
3338			glAttachShader(m_program_a, sh);
3339			glDeleteShader(sh);
3340			glShaderSource(sh, 1, &glsl_a, NULL);
3341			glCompileShader(sh);
3342			glProgramParameteri(m_program_a, GL_PROGRAM_SEPARABLE, GL_TRUE);
3343			glLinkProgram(m_program_a);
3344		}
3345		m_program_b = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_b);
3346
3347		/* create storage buffer */
3348		{
3349			int data = 0;
3350			glGenBuffers(1, &m_storage_buffer);
3351			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer);
3352			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &data, GL_STREAM_READ);
3353		}
3354
3355		glGenProgramPipelines(1, &m_pipeline);
3356		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_b);
3357
3358		glUseProgram(m_program_a);
3359		glBindProgramPipeline(m_pipeline);
3360		glDispatchCompute(1, 1, 1);
3361		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3362
3363		/* validate */
3364		{
3365			int data;
3366			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
3367			if (data != 1)
3368			{
3369				m_context.getTestContext().getLog()
3370					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
3371				return ERROR;
3372			}
3373		}
3374
3375		glUseProgram(0);
3376		glDispatchCompute(1, 1, 1);
3377		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3378
3379		/* validate */
3380		{
3381			int data;
3382			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
3383			if (data != 2)
3384			{
3385				m_context.getTestContext().getLog()
3386					<< tcu::TestLog::Message << "Data is " << data << " should be 2." << tcu::TestLog::EndMessage;
3387				return ERROR;
3388			}
3389		}
3390
3391		glUseProgram(m_program_b);
3392		glDispatchCompute(1, 1, 1);
3393		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3394
3395		/* validate */
3396		{
3397			int data;
3398			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
3399			if (data != 2)
3400			{
3401				m_context.getTestContext().getLog()
3402					<< tcu::TestLog::Message << "Data is " << data << " should be 2." << tcu::TestLog::EndMessage;
3403				return ERROR;
3404			}
3405		}
3406
3407		glUseProgram(0);
3408		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_a);
3409		glDispatchCompute(1, 1, 1);
3410		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3411
3412		/* validate */
3413		{
3414			int data;
3415			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), &data);
3416			if (data != 1)
3417			{
3418				m_context.getTestContext().getLog()
3419					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
3420				return ERROR;
3421			}
3422		}
3423
3424		return NO_ERROR;
3425	}
3426	virtual long Cleanup()
3427	{
3428		glDeleteProgram(m_program_a);
3429		glDeleteProgram(m_program_b);
3430		glDeleteProgramPipelines(1, &m_pipeline);
3431		glDeleteBuffers(1, &m_storage_buffer);
3432		return NO_ERROR;
3433	}
3434};
3435
3436class BasicAtomicCase1 : public ComputeShaderBase
3437{
3438	virtual std::string Title()
3439	{
3440		return NL "Atomic functions";
3441	}
3442	virtual std::string Purpose()
3443	{
3444		return NL "1. Verify that atomicAdd function works as expected with int and uint parameters." NL
3445				  "2. Verify that shared memory can be used with atomic functions." NL
3446				  "3. Verify that groupMemoryBarrier() and barrier() built-in functions work as expected.";
3447	}
3448	virtual std::string Method()
3449	{
3450		return NL "1. Use shared memory as a 'counter' with-in one CS work group." NL
3451				  "2. Each shader invocation increments/decrements 'counter' value using atomicAdd function." NL
3452				  "3. Values returned by atomicAdd function are written to SSBO." NL
3453				  "4. Verify SSBO content (values from 0 to 7 should be written).";
3454	}
3455	virtual std::string PassCriteria()
3456	{
3457		return NL "Everything works as expected.";
3458	}
3459
3460	GLuint m_program;
3461	GLuint m_storage_buffer;
3462
3463	virtual long Setup()
3464	{
3465		m_program		 = 0;
3466		m_storage_buffer = 0;
3467		return NO_ERROR;
3468	}
3469	virtual long Run()
3470	{
3471		const char* const glsl_cs =
3472			NL "layout(local_size_x = 8) in;" NL "layout(std430, binding = 0) buffer Output {" NL
3473			   "  uint g_add_output[8];" NL "  int g_sub_output[8];" NL "};" NL "shared uint g_add_value;" NL
3474			   "shared int g_sub_value;" NL "void main() {" NL "  if (gl_LocalInvocationIndex == 0) {" NL
3475			   "    g_add_value = 0u;" NL "    g_sub_value = 7;" NL "  }" NL
3476			   "  g_add_output[gl_LocalInvocationIndex] = 0u;" NL "  g_sub_output[gl_LocalInvocationIndex] = 0;" NL
3477			   "  groupMemoryBarrier();" NL "  barrier();" NL
3478			   "  g_add_output[gl_LocalInvocationIndex] = atomicAdd(g_add_value, 1u);" NL
3479			   "  g_sub_output[gl_LocalInvocationIndex] = atomicAdd(g_sub_value, -1);" NL "}";
3480		m_program = CreateComputeProgram(glsl_cs);
3481		glLinkProgram(m_program);
3482		if (!CheckProgram(m_program))
3483			return ERROR;
3484
3485		glGenBuffers(1, &m_storage_buffer);
3486		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3487		glBufferData(GL_SHADER_STORAGE_BUFFER, 16 * sizeof(int), NULL, GL_STATIC_DRAW);
3488
3489		glUseProgram(m_program);
3490		glDispatchCompute(1, 1, 1);
3491		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3492
3493		std::vector<int> data(8);
3494		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * 8, &data[0]);
3495		std::sort(data.begin(), data.end());
3496		for (int i = 0; i < 8; ++i)
3497		{
3498			if (data[i] != i)
3499			{
3500				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3501													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
3502				return ERROR;
3503			}
3504		}
3505
3506		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, sizeof(int) * 8, sizeof(int) * 8, &data[0]);
3507		std::sort(data.begin(), data.end());
3508		for (int i = 0; i < 8; ++i)
3509		{
3510			if (data[i] != i)
3511			{
3512				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3513													<< data[i] << " should be " << i << "." << tcu::TestLog::EndMessage;
3514				return ERROR;
3515			}
3516		}
3517
3518		return NO_ERROR;
3519	}
3520	virtual long Cleanup()
3521	{
3522		glUseProgram(0);
3523		glDeleteProgram(m_program);
3524		glDeleteBuffers(1, &m_storage_buffer);
3525		return NO_ERROR;
3526	}
3527};
3528
3529class BasicAtomicCase2 : public ComputeShaderBase
3530{
3531	virtual std::string Title()
3532	{
3533		return NL "Atomic functions - buffer variables";
3534	}
3535	virtual std::string Purpose()
3536	{
3537		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
3538				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with buffer variables." NL
3539				  "2. Verify that atomic functions work with parameters being constants and" NL
3540				  "    with parameters being uniforms." NL
3541				  "3. Verify that barrier() built-in function can be used in a control flow.";
3542	}
3543	virtual std::string Method()
3544	{
3545		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
3546				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
3547				  "3. Verify SSBO content." NL
3548				  "4. Repeat for different number of work groups and different work group sizes.";
3549	}
3550	virtual std::string PassCriteria()
3551	{
3552		return NL "Everything works as expected.";
3553	}
3554
3555	GLuint m_program;
3556	GLuint m_storage_buffer[2];
3557	GLuint m_dispatch_buffer;
3558
3559	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
3560	{
3561		const uvec3		  global_size = local_size * num_groups;
3562		std::stringstream ss;
3563		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
3564		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
3565		   << ", " << global_size.y() << ", " << global_size.z()
3566		   << ");" NL "layout(std430, binding = 0) buffer OutputU {" NL "  uint g_uint_out["
3567		   << global_size.x() * global_size.y() * global_size.z()
3568		   << "];" NL "};" NL "layout(std430, binding = 1) buffer OutputI {" NL "  int data["
3569		   << global_size.x() * global_size.y() * global_size.z()
3570		   << "];" NL "} g_int_out;" NL
3571			  "uniform uint g_uint_value[8] = uint[8](3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u);" NL "void main() {" NL
3572			  "  const uint global_index = gl_GlobalInvocationID.x +" NL
3573			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
3574			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
3575			  "  atomicExchange(g_uint_out[global_index], g_uint_value[0]);" NL
3576			  "  atomicMin(g_uint_out[global_index], g_uint_value[1]);" NL
3577			  "  atomicMax(g_uint_out[global_index], g_uint_value[2]);" NL
3578			  "  atomicAnd(g_uint_out[global_index], g_uint_value[3]);" NL
3579			  "  atomicOr(g_uint_out[global_index], g_uint_value[4]);" NL "  if (g_uint_value[0] > 0u) {" NL
3580			  "    barrier();" // not needed here, just check if compiler accepts it in a control flow
3581			NL "    atomicXor(g_uint_out[global_index], g_uint_value[5]);" NL "  }" NL
3582			  "  atomicCompSwap(g_uint_out[global_index], g_uint_value[6], g_uint_value[7]);" NL NL
3583			  "  atomicExchange(g_int_out.data[global_index], 3);" NL "  atomicMin(g_int_out.data[global_index], 1);" NL
3584			  "  atomicMax(g_int_out.data[global_index], 2);" NL "  atomicAnd(g_int_out.data[global_index], 0x1);" NL
3585			  "  atomicOr(g_int_out.data[global_index], 0x3);" NL "  atomicXor(g_int_out.data[global_index], 0x1);" NL
3586			  "  atomicCompSwap(g_int_out.data[global_index], 0x2, 0x7);" NL "}";
3587		return ss.str();
3588	}
3589	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
3590	{
3591		if (m_program != 0)
3592			glDeleteProgram(m_program);
3593		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
3594		glLinkProgram(m_program);
3595		if (!CheckProgram(m_program))
3596			return false;
3597
3598		const GLuint kBufferSize =
3599			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
3600
3601		if (m_storage_buffer[0] == 0)
3602			glGenBuffers(2, m_storage_buffer);
3603		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
3604		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
3605		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
3606		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
3607
3608		glUseProgram(m_program);
3609		if (dispatch_indirect)
3610		{
3611			if (m_dispatch_buffer == 0)
3612				glGenBuffers(1, &m_dispatch_buffer);
3613			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
3614			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
3615			glDispatchComputeIndirect(0);
3616		}
3617		else
3618		{
3619			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
3620		}
3621		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3622
3623		std::vector<GLuint> udata(kBufferSize);
3624		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
3625		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, &udata[0]);
3626		for (GLuint i = 0; i < kBufferSize; ++i)
3627		{
3628			if (udata[i] != 7)
3629			{
3630				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
3631													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
3632				return false;
3633			}
3634		}
3635
3636		std::vector<GLint> idata(kBufferSize);
3637		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
3638		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint) * kBufferSize, &idata[0]);
3639		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
3640		{
3641			if (idata[i] != 7)
3642			{
3643				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3644													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
3645				return false;
3646			}
3647		}
3648
3649		return true;
3650	}
3651	virtual long Setup()
3652	{
3653		m_program			= 0;
3654		m_storage_buffer[0] = m_storage_buffer[1] = 0;
3655		m_dispatch_buffer						  = 0;
3656		return NO_ERROR;
3657	}
3658	virtual long Run()
3659	{
3660		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
3661			return ERROR;
3662		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
3663			return ERROR;
3664		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
3665			return ERROR;
3666		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
3667			return ERROR;
3668		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
3669			return ERROR;
3670		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
3671			return ERROR;
3672		return NO_ERROR;
3673	}
3674	virtual long Cleanup()
3675	{
3676		glUseProgram(0);
3677		glDeleteProgram(m_program);
3678		glDeleteBuffers(2, m_storage_buffer);
3679		glDeleteBuffers(1, &m_dispatch_buffer);
3680		return NO_ERROR;
3681	}
3682};
3683
3684class BasicAtomicCase3 : public ComputeShaderBase
3685{
3686	virtual std::string Title()
3687	{
3688		return NL "Atomic functions - shared variables";
3689	}
3690	virtual std::string Purpose()
3691	{
3692		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
3693				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with shared variables." NL
3694				  "2. Verify that atomic functions work with parameters being constants and" NL
3695				  "    with parameters being uniforms." NL
3696				  "3. Verify that atomic functions can be used in a control flow.";
3697	}
3698	virtual std::string Method()
3699	{
3700		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
3701				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
3702				  "3. Verify SSBO content." NL
3703				  "4. Repeat for different number of work groups and different work group sizes.";
3704	}
3705	virtual std::string PassCriteria()
3706	{
3707		return NL "Everything works as expected.";
3708	}
3709
3710	GLuint m_program;
3711	GLuint m_storage_buffer;
3712	GLuint m_dispatch_buffer;
3713
3714	std::string GenSource(const uvec3& local_size)
3715	{
3716		std::stringstream ss;
3717		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
3718		   << ", local_size_z = " << local_size.z()
3719		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint g_uint_out["
3720		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "  int g_int_out["
3721		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "};" NL "shared uint g_shared_uint["
3722		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "shared int g_shared_int["
3723		   << local_size.x() * local_size.y() * local_size.z()
3724		   << "];" NL "uniform uint g_uint_value[8] = uint[8](3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u);" NL
3725			  "void main() {" NL "  atomicExchange(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[0]);" NL
3726			  "  atomicMin(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[1]);" NL
3727			  "  atomicMax(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[2]);" NL
3728			  "  atomicAnd(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[3]);" NL
3729			  "  atomicOr(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[4]);" NL
3730			  "  atomicXor(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[5]);" NL
3731			  "  atomicCompSwap(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[6], g_uint_value[7]);" NL NL
3732			  "  atomicExchange(g_shared_int[gl_LocalInvocationIndex], 3);" NL
3733			  "  atomicMin(g_shared_int[gl_LocalInvocationIndex], 1);" NL
3734			  "  atomicMax(g_shared_int[gl_LocalInvocationIndex], 2);" NL
3735			  "  atomicAnd(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL "  if (g_uint_value[1] > 0u) {" NL
3736			  "    atomicOr(g_shared_int[gl_LocalInvocationIndex], 0x3);" NL
3737			  "    atomicXor(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL
3738			  "    atomicCompSwap(g_shared_int[gl_LocalInvocationIndex], 0x2, 0x7);" NL "  }" NL NL
3739			  "  g_uint_out[gl_LocalInvocationIndex] = g_shared_uint[gl_LocalInvocationIndex];" NL
3740			  "  g_int_out[gl_LocalInvocationIndex] = g_shared_int[gl_LocalInvocationIndex];" NL "}";
3741		return ss.str();
3742	}
3743	bool RunIteration(const uvec3& local_size, bool dispatch_indirect)
3744	{
3745		if (m_program != 0)
3746			glDeleteProgram(m_program);
3747		m_program = CreateComputeProgram(GenSource(local_size));
3748		glLinkProgram(m_program);
3749		if (!CheckProgram(m_program))
3750			return false;
3751
3752		const GLuint kBufferSize = local_size.x() * local_size.y() * local_size.z();
3753
3754		if (m_storage_buffer == 0)
3755			glGenBuffers(1, &m_storage_buffer);
3756		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3757		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize * 2, NULL, GL_DYNAMIC_DRAW);
3758
3759		glUseProgram(m_program);
3760		if (dispatch_indirect)
3761		{
3762			const GLuint num_groups[3] = { 1, 1, 1 };
3763			if (m_dispatch_buffer == 0)
3764				glGenBuffers(1, &m_dispatch_buffer);
3765			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
3766			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
3767			glDispatchComputeIndirect(0);
3768		}
3769		else
3770		{
3771			glDispatchCompute(1, 1, 1);
3772		}
3773		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3774
3775		std::vector<GLuint> udata(kBufferSize);
3776		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, &udata[0]);
3777		for (GLuint i = 0; i < kBufferSize; ++i)
3778		{
3779			if (udata[i] != 7)
3780			{
3781				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
3782													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
3783				return false;
3784			}
3785		}
3786
3787		std::vector<GLint> idata(kBufferSize);
3788		glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, sizeof(GLint) * kBufferSize,
3789						   &idata[0]);
3790		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
3791		{
3792			if (idata[i] != 7)
3793			{
3794				m_context.getTestContext().getLog() << tcu::TestLog::Message << "iData at index " << i << " is "
3795													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
3796				return false;
3797			}
3798		}
3799
3800		return true;
3801	}
3802	virtual long Setup()
3803	{
3804		m_program		  = 0;
3805		m_storage_buffer  = 0;
3806		m_dispatch_buffer = 0;
3807		return NO_ERROR;
3808	}
3809	virtual long Run()
3810	{
3811		if (!RunIteration(uvec3(64, 1, 1), false))
3812			return ERROR;
3813		if (!RunIteration(uvec3(1, 1, 64), true))
3814			return ERROR;
3815		if (!RunIteration(uvec3(1, 1, 4), false))
3816			return ERROR;
3817		if (!RunIteration(uvec3(3, 2, 1), true))
3818			return ERROR;
3819		if (!RunIteration(uvec3(2, 4, 2), false))
3820			return ERROR;
3821		if (!RunIteration(uvec3(2, 4, 7), true))
3822			return ERROR;
3823		return NO_ERROR;
3824	}
3825	virtual long Cleanup()
3826	{
3827		glUseProgram(0);
3828		glDeleteProgram(m_program);
3829		glDeleteBuffers(1, &m_storage_buffer);
3830		glDeleteBuffers(1, &m_dispatch_buffer);
3831		return NO_ERROR;
3832	}
3833};
3834
3835class AdvancedCopyImage : public ComputeShaderBase
3836{
3837	virtual std::string Title()
3838	{
3839		return NL "Copy Image";
3840	}
3841	virtual std::string Purpose()
3842	{
3843		return NL "Verify that copying two textures using CS works as expected.";
3844	}
3845	virtual std::string Method()
3846	{
3847		return NL "Use shader image load and store operations to copy two textures in the CS.";
3848	}
3849	virtual std::string PassCriteria()
3850	{
3851		return NL "Everything works as expected.";
3852	}
3853
3854	GLuint m_program;
3855	GLuint m_texture[2];
3856
3857	virtual long Setup()
3858	{
3859		m_program = 0;
3860		memset(m_texture, 0, sizeof(m_texture));
3861		return NO_ERROR;
3862	}
3863
3864	virtual long Run()
3865	{
3866		const char* const glsl_cs = NL "#define TILE_WIDTH 16" NL "#define TILE_HEIGHT 16" NL
3867									   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
3868									   "layout(binding = 0, rgba8) uniform image2D g_input_image;" NL
3869									   "layout(binding = 1, rgba8) uniform image2D g_output_image;" NL	NL
3870									   "layout(local_size_x=TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL
3871									   "void main() {" NL "  const ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL
3872									   "  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
3873									   "  const ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL NL
3874									   "  vec4 pixel = imageLoad(g_input_image, pixel_xy);" NL
3875									   "  imageStore(g_output_image, pixel_xy, pixel);" NL "}";
3876		m_program = CreateComputeProgram(glsl_cs);
3877		glLinkProgram(m_program);
3878		if (!CheckProgram(m_program))
3879			return ERROR;
3880
3881		std::vector<GLubyte> in_image(64 * 64 * 4, 0x0f);
3882		std::vector<GLubyte> out_image(64 * 64 * 4, 0x00);
3883
3884		glGenTextures(2, m_texture);
3885		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
3886		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3887		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 64, 64, 0, GL_RGBA, GL_UNSIGNED_BYTE, &in_image[0]);
3888
3889		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
3890		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3891		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 64, 64, 0, GL_RGBA, GL_UNSIGNED_BYTE, &out_image[0]);
3892
3893		glUseProgram(m_program);
3894		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);
3895		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
3896		glDispatchCompute(5, 4,
3897						  1); // 5 is on purpose, to ensure that out of bounds image load and stores have no effect
3898		glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
3899
3900		std::vector<GLubyte> data(64 * 64 * 4);
3901		glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
3902		for (std::size_t i = 0; i < data.size(); ++i)
3903		{
3904			if (getWindowWidth() > 100 && data[i] != 0x0f)
3905			{
3906				m_context.getTestContext().getLog()
3907					<< tcu::TestLog::Message << "Data at index " << i << " is " << data[i] << " should be " << 0x0f
3908					<< "." << tcu::TestLog::EndMessage;
3909				return ERROR;
3910			}
3911		}
3912
3913		return NO_ERROR;
3914	}
3915	virtual long Cleanup()
3916	{
3917		glUseProgram(0);
3918		glDeleteProgram(m_program);
3919		glDeleteTextures(2, m_texture);
3920		return NO_ERROR;
3921	}
3922};
3923
3924class AdvancedPipelinePreVS : public ComputeShaderBase
3925{
3926	virtual std::string Title()
3927	{
3928		return NL "CS as an additional pipeline stage - Before VS (1)";
3929	}
3930	virtual std::string Purpose()
3931	{
3932		return NL "Verify that CS which runs just before VS and modifies VBO content works as expected.";
3933	}
3934	virtual std::string Method()
3935	{
3936		return NL "1. Prepare VBO and VAO for a drawing operation." NL "2. Run CS to modify existing VBO content." NL
3937				  "3. Issue MemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT) command." NL
3938				  "4. Issue draw call command." NL "5. Verify that the framebuffer content is as expected.";
3939	}
3940	virtual std::string PassCriteria()
3941	{
3942		return NL "Everything works as expected.";
3943	}
3944
3945	GLuint m_program[2];
3946	GLuint m_vertex_buffer;
3947	GLuint m_vertex_array;
3948
3949	virtual long Setup()
3950	{
3951		memset(m_program, 0, sizeof(m_program));
3952		m_vertex_buffer = 0;
3953		m_vertex_array  = 0;
3954		return NO_ERROR;
3955	}
3956	virtual long Run()
3957	{
3958		const char* const glsl_cs =
3959			NL "layout(local_size_x = 4) in;" NL "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL
3960			   "layout(binding = 0, std430) buffer VertexBuffer {" NL "  Vertex g_vertex[];" NL "};" NL
3961			   "uniform float g_scale = 0.8;" NL "void main() {" NL
3962			   "  g_vertex[gl_GlobalInvocationID.x].position.xyz *= g_scale;" NL
3963			   "  g_vertex[gl_GlobalInvocationID.x].color *= vec4(0, 1, 0, 1);" NL "}";
3964		m_program[0] = CreateComputeProgram(glsl_cs);
3965		glLinkProgram(m_program[0]);
3966		if (!CheckProgram(m_program[0]))
3967			return ERROR;
3968
3969		const char* const glsl_vs =
3970			NL "layout(location = 0) in vec4 g_position;" NL "layout(location = 1) in vec4 g_color;" NL
3971			   "out StageData {" NL "  vec4 color;" NL "} g_vs_out;" NL "void main() {" NL
3972			   "  gl_Position = g_position;" NL "  g_vs_out.color = g_color;" NL "}";
3973
3974		const char* const glsl_fs =
3975			NL "in StageData {" NL "  vec4 color;" NL "} g_fs_in;" NL "layout(location = 0) out vec4 g_color;" NL
3976			   "void main() {" NL "  g_color = g_fs_in.color;" NL "}";
3977		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
3978		glLinkProgram(m_program[1]);
3979		if (!CheckProgram(m_program[1]))
3980			return ERROR;
3981
3982		/* vertex buffer */
3983		{
3984			const float data[] = { -1, -1, 0, 1, 1, 1, 1, 1, 1, -1, 0, 1, 1, 1, 1, 1,
3985								   -1, 1,  0, 1, 1, 1, 1, 1, 1, 1,  0, 1, 1, 1, 1, 1 };
3986			glGenBuffers(1, &m_vertex_buffer);
3987			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3988			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
3989			glBindBuffer(GL_ARRAY_BUFFER, 0);
3990		}
3991
3992		glGenVertexArrays(1, &m_vertex_array);
3993		glBindVertexArray(m_vertex_array);
3994		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3995		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
3996		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
3997		glBindBuffer(GL_ARRAY_BUFFER, 0);
3998		glEnableVertexAttribArray(0);
3999		glEnableVertexAttribArray(1);
4000		glBindVertexArray(0);
4001
4002		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_vertex_buffer);
4003		glUseProgram(m_program[0]);
4004		glDispatchCompute(1, 1, 1);
4005
4006		glClear(GL_COLOR_BUFFER_BIT);
4007		glUseProgram(m_program[1]);
4008		glBindVertexArray(m_vertex_array);
4009		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
4010		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
4011
4012		if (getWindowWidth() < 500 &&
4013			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
4014		{
4015			return ERROR;
4016		}
4017		return NO_ERROR;
4018	}
4019	virtual long Cleanup()
4020	{
4021		glUseProgram(0);
4022		for (int i = 0; i < 2; ++i)
4023			glDeleteProgram(m_program[i]);
4024		glDeleteBuffers(1, &m_vertex_buffer);
4025		glDeleteVertexArrays(1, &m_vertex_array);
4026		return NO_ERROR;
4027	}
4028};
4029
4030class AdvancedPipelineGenDrawCommands : public ComputeShaderBase
4031{
4032	virtual std::string Title()
4033	{
4034		return NL "CS as an additional pipeline stage - Before VS (2)";
4035	}
4036	virtual std::string Purpose()
4037	{
4038		return NL "Verify that a complex scenario where CS is used to generate drawing commands" NL
4039				  "and write them to a draw indirect buffer works as expected. This is a practial usage of CS." NL
4040				  "CS is used for culling objects which are outside of the viewing frustum.";
4041	}
4042	virtual std::string Method()
4043	{
4044		return NL "1. Run CS which will generate four sets of draw call parameters and write them to the draw indirect "
4045				  "buffer." NL "2. One set of draw call parameters will be: 0, 0, 0, 0" NL
4046				  "    (which means that an object is outside of the viewing frustum and should not be drawn)." NL
4047				  "3. Issue MemoryBarrier(GL_COMMAND_BARRIER_BIT) command." NL
4048				  "4. Issue four draw indirect commands." NL "5. Verify that the framebuffer content is as expected.";
4049	}
4050	virtual std::string PassCriteria()
4051	{
4052		return NL "Everything works as expected.";
4053	}
4054
4055	GLuint m_program[2];
4056	GLuint m_vertex_buffer;
4057	GLuint m_index_buffer;
4058	GLuint m_vertex_array;
4059	GLuint m_draw_buffer;
4060	GLuint m_object_buffer;
4061
4062	virtual long Setup()
4063	{
4064		memset(m_program, 0, sizeof(m_program));
4065		m_vertex_buffer = 0;
4066		m_index_buffer  = 0;
4067		m_vertex_array  = 0;
4068		m_draw_buffer   = 0;
4069		m_object_buffer = 0;
4070		return NO_ERROR;
4071	}
4072	virtual long Run()
4073	{
4074		GLint res;
4075		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
4076		if (res <= 0)
4077		{
4078			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
4079			return NO_ERROR;
4080		}
4081
4082		const char* const glsl_cs =
4083			NL "layout(local_size_x = 4) in;" NL "struct DrawCommand {" NL "  uint count;" NL
4084			   "  uint instance_count;" NL "  uint first_index;" NL "  int base_vertex;" NL "  uint base_instance;" NL
4085			   "};" NL "layout(std430) buffer;" NL "layout(binding = 0) readonly buffer ObjectBuffer {" NL
4086			   "  mat4 transform[4];" NL "  uint count[4];" NL "  uint first_index[4];" NL "} g_objects;" NL
4087			   "layout(binding = 1) writeonly buffer DrawCommandBuffer {" NL "  DrawCommand g_command[4];" NL "};" NL
4088			   "bool IsObjectVisible(uint id) {" NL
4089			   "  if (g_objects.transform[id][3].x < -1.0 || g_objects.transform[id][3].x > 1.0) return false;" NL
4090			   "  if (g_objects.transform[id][3][1] < -1.0 || g_objects.transform[id][3][1] > 1.0) return false;" NL
4091			   "  if (g_objects.transform[id][3][2] < -1.0 || g_objects.transform[id][3].z > 1.0) return false;" NL
4092			   "  return true;" NL "}" NL "void main() {" NL "  uint id = gl_GlobalInvocationID.x;" NL
4093			   "  g_command[id].count = 0;" NL "  g_command[id].instance_count = 0;" NL
4094			   "  g_command[id].first_index = 0;" NL "  g_command[id].base_vertex = 0;" NL
4095			   "  g_command[id].base_instance = 0;" NL "  if (IsObjectVisible(id)) {" NL
4096			   "    g_command[id].count = g_objects.count[id];" NL "    g_command[id].instance_count = 1;" NL
4097			   "    g_command[id].first_index = g_objects.first_index[id];" NL "  }" NL "}";
4098		m_program[0] = CreateComputeProgram(glsl_cs);
4099		glLinkProgram(m_program[0]);
4100		if (!CheckProgram(m_program[0]))
4101			return ERROR;
4102
4103		const char* const glsl_vs =
4104			NL "layout(location = 0) in vec4 g_position;" NL "layout(location = 1) in vec3 g_color;" NL
4105			   "out StageData {" NL "  vec3 color;" NL "} g_vs_out;" NL
4106			   "layout(binding = 0, std430) buffer ObjectBuffer {" NL "  mat4 transform[4];" NL "  uint count[4];" NL
4107			   "  uint first_index[4];" NL "} g_objects;" NL "uniform int g_object_id;" NL "void main() {" NL
4108			   "  gl_Position = g_objects.transform[g_object_id] * g_position;" NL "  g_vs_out.color = g_color;" NL "}";
4109
4110		const char* const glsl_fs =
4111			NL "in StageData {" NL "  vec3 color;" NL "} g_fs_in;" NL "layout(location = 0) out vec4 g_color;" NL
4112			   "void main() {" NL "  g_color = vec4(g_fs_in.color, 1);" NL "}";
4113		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
4114		glLinkProgram(m_program[1]);
4115		if (!CheckProgram(m_program[1]))
4116			return ERROR;
4117		glViewport(0, 0, 100, 100);
4118
4119		/* object buffer */
4120		{
4121			struct
4122			{
4123				mat4   transform[4];
4124				GLuint count[4];
4125				GLuint first_index[4];
4126			} data = {
4127				{ tcu::translationMatrix(vec3(-1.5f, -0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, -0.5f, 0.0f)),
4128				  tcu::translationMatrix(vec3(-0.5f, 0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, 0.5f, 0.0f)) },
4129				{ 4, 4, 4, 4 },
4130				{ 0, 4, 8, 12 }
4131			};
4132			glGenBuffers(1, &m_object_buffer);
4133			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_object_buffer);
4134			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
4135		}
4136		/* vertex buffer */
4137		{
4138			const vec3 data[] = { vec3(-0.4f, -0.4f, 0.0f), vec3(1, 0, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 0, 0),
4139								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 0, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 0, 0),
4140								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(0, 1, 0),
4141								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 1, 0),
4142								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 0, 1), vec3(0.4f, -0.4f, 0.0f), vec3(0, 0, 1),
4143								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 0, 1), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 0, 1),
4144								  vec3(-0.4f, -0.4f, 0.0f), vec3(1, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 1, 0),
4145								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 1, 0) };
4146			glGenBuffers(1, &m_vertex_buffer);
4147			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4148			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
4149			glBindBuffer(GL_ARRAY_BUFFER, 0);
4150		}
4151		/* index buffer */
4152		{
4153			const GLushort data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4154			glGenBuffers(1, &m_index_buffer);
4155			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
4156			glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW);
4157			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
4158		}
4159		glGenBuffers(1, &m_draw_buffer);
4160		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
4161		glBufferData(GL_DRAW_INDIRECT_BUFFER, 4 * sizeof(GLuint) * 5, NULL, GL_DYNAMIC_DRAW);
4162		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
4163
4164		glGenVertexArrays(1, &m_vertex_array);
4165		glBindVertexArray(m_vertex_array);
4166		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4167		glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), 0);
4168		glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), reinterpret_cast<void*>(sizeof(vec3)));
4169		glBindBuffer(GL_ARRAY_BUFFER, 0);
4170		glEnableVertexAttribArray(0);
4171		glEnableVertexAttribArray(1);
4172		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
4173		glBindVertexArray(0);
4174
4175		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_draw_buffer);
4176		glUseProgram(m_program[0]);
4177		glDispatchCompute(1, 1, 1);
4178
4179		glClear(GL_COLOR_BUFFER_BIT);
4180		glUseProgram(m_program[1]);
4181		glBindVertexArray(m_vertex_array);
4182		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
4183		glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
4184		/* draw (CPU draw calls dispatch, could be done by the GPU with ARB_multi_draw_indirect) */
4185		{
4186			GLsizeiptr offset = 0;
4187			for (int i = 0; i < 4; ++i)
4188			{
4189				glUniform1i(glGetUniformLocation(m_program[1], "g_object_id"), i);
4190				glDrawElementsIndirect(GL_TRIANGLE_STRIP, GL_UNSIGNED_SHORT, reinterpret_cast<void*>(offset));
4191				offset += 5 * sizeof(GLuint);
4192			}
4193		}
4194		if (getWindowWidth() >= 100 && getWindowHeight() >= 100 &&
4195			!ValidateWindow4Quads(vec3(0), vec3(0, 1, 0), vec3(1, 1, 0), vec3(0, 0, 1)))
4196		{
4197			return ERROR;
4198		}
4199		return NO_ERROR;
4200	}
4201	virtual long Cleanup()
4202	{
4203		glUseProgram(0);
4204		for (int i = 0; i < 2; ++i)
4205			glDeleteProgram(m_program[i]);
4206		glDeleteBuffers(1, &m_vertex_buffer);
4207		glDeleteBuffers(1, &m_index_buffer);
4208		glDeleteVertexArrays(1, &m_vertex_array);
4209		glDeleteBuffers(1, &m_draw_buffer);
4210		glDeleteBuffers(1, &m_object_buffer);
4211		glViewport(0, 0, getWindowWidth(), getWindowHeight());
4212		return NO_ERROR;
4213	}
4214};
4215
4216class AdvancedPipelineComputeChain : public ComputeShaderBase
4217{
4218	virtual std::string Title()
4219	{
4220		return NL "Compute Chain";
4221	}
4222	virtual std::string Purpose()
4223	{
4224		return NL "1. Verify that dispatching several compute kernels that work in a sequence" NL
4225				  "    with a common set of resources works as expected." NL
4226				  "2. Verify that indexing nested structures with built-in variables work as expected." NL
4227				  "3. Verify that two kernels can write to the same resource without MemoryBarrier" NL
4228				  "    command if target regions of memory do not overlap.";
4229	}
4230	virtual std::string Method()
4231	{
4232		return NL "1. Create a set of GPU resources (buffers, images, atomic counters)." NL
4233				  "2. Dispatch Kernel0 that write to these resources." NL "3. Issue MemoryBarrier command." NL
4234				  "4. Dispatch Kernel1 that read/write from/to these resources." NL "5. Issue MemoryBarrier command." NL
4235				  "6. Dispatch Kernel2 that read/write from/to these resources." NL
4236				  "7. Verify that content of all resources is as expected.";
4237	}
4238	virtual std::string PassCriteria()
4239	{
4240		return NL "Everything works as expected.";
4241	}
4242
4243	GLuint m_program[3];
4244	GLuint m_storage_buffer[4];
4245	GLuint m_counter_buffer;
4246	GLuint m_texture;
4247	GLuint m_fbo;
4248
4249	std::string Common()
4250	{
4251		return NL "struct S0 {" NL "  int m0[8];" NL "};" NL "struct S1 {" NL "  S0 m0[8];" NL "};" NL
4252				  "layout(binding = 0, std430) buffer Buffer0 {" NL "  int m0[5];" NL "  S1 m1[8];" NL "} g_buffer0;" NL
4253				  "layout(binding = 1, std430) buffer Buffer1 {" NL "  uint data[8];" NL "} g_buffer1;" NL
4254				  "layout(binding = 2, std430) buffer Buffer2 {" NL "  int data[256];" NL "} g_buffer2;" NL
4255				  "layout(binding = 3, std430) buffer Buffer3 {" NL "  int data[256];" NL "} g_buffer3;" NL
4256				  "layout(binding = 4, std430) buffer Buffer4 {" NL "  mat4 data0;" NL "  mat4 data1;" NL
4257				  "} g_buffer4;" NL "layout(binding = 0, rgba32f) uniform image2D g_image0;" NL
4258				  "layout(binding = 1, offset = 8) uniform atomic_uint g_counter[2];";
4259	}
4260	std::string GenGLSL(int p)
4261	{
4262		std::stringstream ss;
4263		ss << Common();
4264		if (p == 0)
4265		{
4266			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL
4267					 "void UpdateBuffer0(uvec3 id, int add_val) {" NL "  if (id.x < 8 && id.y < 8 && id.z < 8) {" NL
4268					 "    g_buffer0.m1[id.z].m0[id.y].m0[id.x] += add_val;" NL "  }" NL "}" NL
4269					 "uniform int g_add_value = 1;" NL "uniform uint g_counter_y = 1;" NL
4270					 "uniform vec4 g_image_value = vec4(0.125, 0.25, 0.375, 0.5);" NL "void main() {" NL
4271					 "  uvec3 id = gl_GlobalInvocationID;" NL "  UpdateBuffer0(id, 1);" NL
4272					 "  UpdateBuffer0(id, g_add_value);" NL "  if (id == uvec3(1, g_counter_y, 1)) {" NL
4273					 "    uint idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL
4274					 "    idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL "  }" NL
4275					 "  if (id.x < 4 && id.y < 4 && id.z == 0) {" NL
4276					 "    vec4 v = imageLoad(g_image0, ivec2(id.xy));" NL
4277					 "    imageStore(g_image0, ivec2(id.xy), v + g_image_value);" NL "  }" NL
4278					 "  if (id.x < 2 && id.y == 0 && id.z == 0) {" NL "    g_buffer2.data[id.x] -= int(g_counter_y);" NL
4279					 "  }" NL "}";
4280		}
4281		else if (p == 1)
4282		{
4283			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in;"
4284				// translation matrix
4285				NL "uniform mat4 g_mvp = mat4(1.0, 0.0, 0.0, 0.0,  0.0, 1.0, 0.0, 0.0,  0.0, 0.0, 1.0, 0.0,  10.0, "
4286					 "20.0, 30.0, 1.0);" NL "void main() {" NL "  if (gl_GlobalInvocationID == uvec3(0)) {" NL
4287					 "    g_buffer4.data0 *= g_mvp;" NL "  }" NL "  if (gl_WorkGroupID == uvec3(0)) {" NL
4288					 "    g_buffer4.data1[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = "
4289					 "g_mvp[gl_LocalInvocationID.x][gl_LocalInvocationID.y];" NL "  }" NL "}";
4290		}
4291		else if (p == 2)
4292		{
4293			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL "void main() {" NL "}";
4294		}
4295		return ss.str();
4296	}
4297	virtual long Setup()
4298	{
4299		memset(m_program, 0, sizeof(m_program));
4300		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
4301		m_counter_buffer = 0;
4302		m_texture		 = 0;
4303		return NO_ERROR;
4304	}
4305	virtual long Run()
4306	{
4307		using namespace tcu;
4308
4309		for (int i = 0; i < 3; ++i)
4310		{
4311			m_program[i] = CreateComputeProgram(GenGLSL(i));
4312			glLinkProgram(m_program[i]);
4313			if (!CheckProgram(m_program[i]))
4314				return ERROR;
4315		}
4316
4317		glGenBuffers(4, m_storage_buffer);
4318		/* storage buffer 0 */
4319		{
4320			std::vector<int> data(5 + 8 * 8 * 8);
4321			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
4322			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(int)), &data[0], GL_STATIC_COPY);
4323		}
4324		/* storage buffer 1 */
4325		{
4326			const GLuint data[8] = { 0 };
4327			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
4328			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_STATIC_COPY);
4329		}
4330		/* storage buffer 2 & 3 */
4331		{
4332			std::vector<GLint> data(512, 7);
4333			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
4334			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(GLint)), &data[0], GL_STATIC_COPY);
4335
4336			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_storage_buffer[2], 0,
4337							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
4338			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer[2],
4339							  (GLintptr)(sizeof(GLint) * data.size() / 2),
4340							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
4341		}
4342		/* storage buffer 4 */
4343		{
4344			std::vector<mat4> data(2);
4345			data[0] = mat4(1);
4346			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_storage_buffer[3]);
4347			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(mat4)), &data[0], GL_STATIC_COPY);
4348		}
4349		/* counter buffer */
4350		{
4351			GLuint data[4] = { 0 };
4352			glGenBuffers(1, &m_counter_buffer);
4353			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 1, m_counter_buffer);
4354			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), data, GL_STATIC_COPY);
4355		}
4356		/* texture */
4357		{
4358			std::vector<vec4> data(4 * 4, vec4(0.0f));
4359			glGenTextures(1, &m_texture);
4360			glBindTexture(GL_TEXTURE_2D, m_texture);
4361			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4362			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4363			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 4, 4, 0, GL_RGBA, GL_FLOAT, &data[0]);
4364			glBindTexture(GL_TEXTURE_2D, 0);
4365		}
4366
4367		glUseProgram(m_program[0]);
4368		glBindImageTexture(0, m_texture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA32F);
4369		glDispatchCompute(2, 2, 2);
4370		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4371		glDispatchCompute(3, 2, 2);
4372
4373		glUseProgram(m_program[1]);
4374		glDispatchCompute(4, 3, 7);
4375
4376		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
4377						GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4378
4379		/* validate texture */
4380		{
4381			std::vector<vec4> data(4 * 4);
4382			glBindTexture(GL_TEXTURE_2D, m_texture);
4383			glGenFramebuffers(1, &m_fbo);
4384			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4385			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture, 0);
4386			std::vector<GLubyte> colorData(4 * 4 * 4);
4387			glReadPixels(0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4388			for (int i = 0; i < 4 * 4 * 4; i += 4)
4389			{
4390				data[i / 4] =
4391					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4392						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4393			}
4394			for (std::size_t i = 0; i < data.size(); ++i)
4395			{
4396				if (!ColorEqual(data[i], vec4(0.25f, 0.5f, 0.75f, 1.0f), g_color_eps))
4397				{
4398					m_context.getTestContext().getLog()
4399						<< tcu::TestLog::Message << "Invalid data at texture." << tcu::TestLog::EndMessage;
4400					return ERROR;
4401				}
4402			}
4403		}
4404		/* validate storage buffer 0 */
4405		{
4406			std::vector<int> data(5 + 8 * 8 * 8);
4407			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
4408			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, (GLsizeiptr)(data.size() * sizeof(int)), &data[0]);
4409			for (std::size_t i = 5; i < data.size(); ++i)
4410			{
4411				if (data[i] != 4)
4412				{
4413					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data[i]
4414														<< " should be 2." << tcu::TestLog::EndMessage;
4415					return ERROR;
4416				}
4417			}
4418		}
4419		/* validate storage buffer 1 */
4420		{
4421			GLuint data[8];
4422			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
4423			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), data);
4424			for (GLuint i = 0; i < 4; ++i)
4425			{
4426				if (data[i] != i)
4427				{
4428					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data[i]
4429														<< " should be " << i << "." << tcu::TestLog::EndMessage;
4430					return ERROR;
4431				}
4432			}
4433		}
4434		/* validate storage buffer 2 & 3 */
4435		{
4436			std::vector<GLint> data(512);
4437			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
4438			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, (GLsizeiptr)(sizeof(GLint) * data.size()), &data[0]);
4439			for (int i = 0; i < 2; ++i)
4440			{
4441				if (data[i] != 5)
4442				{
4443					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4444														<< " should be: 5." << tcu::TestLog::EndMessage;
4445					return ERROR;
4446				}
4447				if (data[i + 256] != 7)
4448				{
4449					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i + 256]
4450														<< " should be: 7." << tcu::TestLog::EndMessage;
4451					return ERROR;
4452				}
4453			}
4454		}
4455		/* validate storage buffer 4 */
4456		{
4457			mat4 data[2];
4458			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[3]);
4459			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data[0](0, 0));
4460			if (data[0] != translationMatrix(vec3(10.0f, 20.0f, 30.0f)))
4461			{
4462				m_context.getTestContext().getLog()
4463					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
4464				return ERROR;
4465			}
4466			if (data[1] != transpose(translationMatrix(vec3(10.0f, 20.0f, 30.0f))))
4467			{
4468				m_context.getTestContext().getLog()
4469					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
4470				return ERROR;
4471			}
4472		}
4473		/* validate counter buffer */
4474		{
4475			GLuint data[4] = { 0 };
4476			glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(data), data);
4477			if (data[3] != 4)
4478			{
4479				m_context.getTestContext().getLog()
4480					<< tcu::TestLog::Message << "Data is: " << data[3] << " should be: 4." << tcu::TestLog::EndMessage;
4481				return ERROR;
4482			}
4483		}
4484
4485		return NO_ERROR;
4486	}
4487	virtual long Cleanup()
4488	{
4489		glUseProgram(0);
4490		for (int i = 0; i < 3; ++i)
4491			glDeleteProgram(m_program[i]);
4492		glDeleteBuffers(4, m_storage_buffer);
4493		glDeleteBuffers(1, &m_counter_buffer);
4494		glDeleteTextures(1, &m_texture);
4495		glDeleteFramebuffers(1, &m_fbo);
4496		return NO_ERROR;
4497	}
4498};
4499
4500class AdvancedPipelinePostFS : public ComputeShaderBase
4501{
4502	virtual std::string Title()
4503	{
4504		return NL "CS as an additional pipeline stage - After FS";
4505	}
4506	virtual std::string Purpose()
4507	{
4508		return NL "1. Verify that CS which runs just after FS to do a post-processing on a rendered image works as "
4509				  "expected." NL "2. Verify that CS used as a post-processing filter works as expected." NL
4510				  "3. Verify that several CS kernels which run in a sequence to do a post-processing on a rendered "
4511				  "image works as expected.";
4512	}
4513	virtual std::string Method()
4514	{
4515		return NL
4516			"1. Render image to Texture0 using VS and FS." NL
4517			"2. Use Texture0 as an input to Kernel0 which performs post-processing and writes result to Texture1." NL
4518			"3. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
4519			"4. Use Texture1 as an input to Kernel1 which performs post-processing and writes result to Texture0." NL
4520			"5. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
4521			"6. Verify content of the final post-processed image (Texture0).";
4522	}
4523	virtual std::string PassCriteria()
4524	{
4525		return NL "Everything works as expected.";
4526	}
4527
4528	GLuint m_program[3];
4529	GLuint m_render_target[2];
4530	GLuint m_framebuffer;
4531	GLuint m_vertex_array;
4532
4533	virtual long Setup()
4534	{
4535		memset(m_program, 0, sizeof(m_program));
4536		memset(m_render_target, 0, sizeof(m_render_target));
4537		m_framebuffer  = 0;
4538		m_vertex_array = 0;
4539		return NO_ERROR;
4540	}
4541
4542	virtual long Run()
4543	{
4544		const char* const glsl_vs =
4545			NL "const vec2 g_vertex[4] = vec2[4](vec2(0), vec2(-1, -1), vec2(3, -1), vec2(-1, 3));" NL
4546			   "void main() {" NL "  gl_Position = vec4(g_vertex[gl_VertexID], 0, 1);" NL "}";
4547
4548		const char* const glsl_fs =
4549			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1, 0, 0, 1);" NL "}";
4550
4551		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
4552		glLinkProgram(m_program[0]);
4553		if (!CheckProgram(m_program[0]))
4554			return ERROR;
4555
4556		const char* const glsl_cs =
4557			NL "#define TILE_WIDTH 16" NL "#define TILE_HEIGHT 16" NL
4558			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
4559			   "layout(binding = 0, rgba32f) uniform image2D g_input_image;" NL
4560			   "layout(binding = 1, rgba32f) uniform image2D g_output_image;" NL	NL
4561			   "layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL "void main() {" NL
4562			   "  const ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL
4563			   "  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL NL "  if (thread_xy == ivec2(0)) {" NL
4564			   "    const ivec2 pixel_xy = tile_xy * kTileSize;" NL "    for (int y = 0; y < TILE_HEIGHT; ++y) {" NL
4565			   "      for (int x = 0; x < TILE_WIDTH; ++x) {" NL
4566			   "        imageStore(g_output_image, pixel_xy + ivec2(x, y), vec4(0, 1, 0, 1));" NL "      }" NL
4567			   "    }" NL "  }" NL "}";
4568
4569		m_program[1] = CreateComputeProgram(glsl_cs);
4570		glLinkProgram(m_program[1]);
4571		if (!CheckProgram(m_program[1]))
4572			return ERROR;
4573
4574		const char* const glsl_cs2 = NL "#define TILE_WIDTH 32" NL "#define TILE_HEIGHT 32" NL
4575										"const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
4576										"layout(binding = 0, rgba32f) uniform image2D g_input_image;" NL
4577										"layout(binding = 1, rgba32f) uniform image2D g_output_image;" NL	NL
4578										"layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL
4579										"vec4 Process(vec4 ic) {" NL "  return ic + vec4(1, 0, 0, 0);" NL "}" NL
4580										"void main() {" NL "  const ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL
4581										"  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
4582										"  const ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL
4583										"  vec4 ic = imageLoad(g_input_image, pixel_xy);" NL
4584										"  imageStore(g_output_image, pixel_xy, Process(ic));" NL "}";
4585		m_program[2] = CreateComputeProgram(glsl_cs2);
4586		glLinkProgram(m_program[2]);
4587		if (!CheckProgram(m_program[2]))
4588			return ERROR;
4589
4590		glGenVertexArrays(1, &m_vertex_array);
4591
4592		/* init render targets */
4593		{
4594			std::vector<vec4> data(128 * 128);
4595			glGenTextures(2, m_render_target);
4596			for (int i = 0; i < 2; ++i)
4597			{
4598				glBindTexture(GL_TEXTURE_2D, m_render_target[i]);
4599				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
4600				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 128, 128, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
4601			}
4602			glBindTexture(GL_TEXTURE_2D, 0);
4603		}
4604
4605		glGenFramebuffers(1, &m_framebuffer);
4606		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
4607		glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, m_render_target[0], 0);
4608		glBindFramebuffer(GL_FRAMEBUFFER, 0);
4609
4610		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
4611		glUseProgram(m_program[0]);
4612		glBindVertexArray(m_vertex_array);
4613		glClear(GL_COLOR_BUFFER_BIT);
4614		glViewport(0, 0, 128, 128);
4615		// draw full-viewport triangle
4616		glDrawArrays(GL_TRIANGLES, 1,
4617					 3); // note: <first> is 1 this means that gl_VertexID in the VS will be: 1, 2 and 3
4618		glBindFramebuffer(GL_FRAMEBUFFER, 0);
4619
4620		glBindImageTexture(0, m_render_target[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);  // input
4621		glBindImageTexture(1, m_render_target[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); // output
4622		glUseProgram(m_program[1]);
4623		glDispatchCompute(128 / 16, 128 / 16, 1);
4624
4625		glBindImageTexture(0, m_render_target[1], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F);  // input
4626		glBindImageTexture(1, m_render_target[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); // output
4627		glUseProgram(m_program[2]);
4628		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4629		glDispatchCompute(128 / 32, 128 / 32, 1);
4630
4631		/* validate render target */
4632		{
4633			std::vector<vec4> data(128 * 128);
4634			glBindTexture(GL_TEXTURE_2D, m_render_target[0]);
4635			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
4636			glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
4637			for (std::size_t i = 0; i < data.size(); ++i)
4638			{
4639				if (!IsEqual(data[i], vec4(1, 1, 0, 1)))
4640				{
4641					m_context.getTestContext().getLog()
4642						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
4643					return ERROR;
4644				}
4645			}
4646		}
4647		return NO_ERROR;
4648	}
4649
4650	virtual long Cleanup()
4651	{
4652		glViewport(0, 0, getWindowWidth(), getWindowHeight());
4653		glUseProgram(0);
4654		for (int i = 0; i < 3; ++i)
4655			glDeleteProgram(m_program[i]);
4656		glDeleteTextures(2, m_render_target);
4657		glDeleteVertexArrays(1, &m_vertex_array);
4658		glDeleteFramebuffers(1, &m_framebuffer);
4659		return NO_ERROR;
4660	}
4661};
4662
4663class AdvancedPipelinePostXFB : public ComputeShaderBase
4664{
4665	virtual std::string Title()
4666	{
4667		return NL "CS as an additional pipeline stage - After XFB";
4668	}
4669	virtual std::string Purpose()
4670	{
4671		return NL "1. Verify that CS which process data fedback by VS works as expected." NL
4672				  "2. Verify that XFB and SSBO works correctly together in one shader." NL
4673				  "3. Verify that 'switch' statment which selects different execution path for each CS thread works as "
4674				  "expected.";
4675	}
4676	virtual std::string Method()
4677	{
4678		return NL "1. Draw triangle with XFB enabled. Some data is written to the XFB buffer." NL
4679				  "2. Use XFB buffer as 'input SSBO' in CS. Process data and write it to 'output SSBO'." NL
4680				  "3. Verify 'output SSBO' content.";
4681	}
4682	virtual std::string PassCriteria()
4683	{
4684		return NL "Everything works as expected.";
4685	}
4686
4687	GLuint m_program[2];
4688	GLuint m_storage_buffer;
4689	GLuint m_xfb_buffer;
4690	GLuint m_vertex_buffer;
4691	GLuint m_vertex_array;
4692
4693	virtual long Setup()
4694	{
4695		memset(m_program, 0, sizeof(m_program));
4696		m_storage_buffer = 0;
4697		m_xfb_buffer	 = 0;
4698		m_vertex_buffer  = 0;
4699		m_vertex_array   = 0;
4700		return NO_ERROR;
4701	}
4702	virtual long Run()
4703	{
4704		GLint res;
4705		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
4706		if (res <= 0)
4707		{
4708			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
4709			return NO_ERROR;
4710		}
4711
4712		const char* const glsl_vs =
4713			NL "layout(location = 0) in vec4 g_position;" NL "layout(location = 1) in vec4 g_color;" NL
4714			   "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL "out StageData {" NL
4715			   "  vec4 color;" NL "} g_vs_out;" NL "layout(binding = 0, std430) buffer StageData {" NL
4716			   "  Vertex vertex[];" NL "} g_vs_buffer;" NL "void main() {" NL "  gl_Position = g_position;" NL
4717			   "  g_vs_out.color = g_color;" NL "  g_vs_buffer.vertex[gl_VertexID].position = g_position;" NL
4718			   "  g_vs_buffer.vertex[gl_VertexID].color = g_color;" NL "}";
4719
4720		const char* const glsl_fs =
4721			NL "in StageData {" NL "  vec4 color;" NL "} g_fs_in;" NL "layout(location = 0) out vec4 g_color;" NL
4722			   "void main() {" NL "  g_color = g_fs_in.color;" NL "}";
4723
4724		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
4725		/* setup xfb varyings */
4726		{
4727			const char* const var[2] = { "gl_Position", "StageData.color" };
4728			glTransformFeedbackVaryings(m_program[0], 2, var, GL_INTERLEAVED_ATTRIBS);
4729		}
4730		glLinkProgram(m_program[0]);
4731		if (!CheckProgram(m_program[0]))
4732			return ERROR;
4733
4734		const char* const glsl_cs =
4735			NL "layout(local_size_x = 3) in;" NL "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL
4736			   "layout(binding = 3, std430) buffer Buffer {" NL "  Vertex g_vertex[3];" NL "};" NL
4737			   "uniform vec4 g_color1 = vec4(0, 0, 1, 0);" NL "uniform int g_two = 2;" NL
4738			   "void UpdateVertex2(int i) {" NL "  g_vertex[i].color -= vec4(-1, 1, 0, 0);" NL "}" NL "void main() {" NL
4739			   "  switch (gl_GlobalInvocationID.x) {" NL
4740			   "    case 0: g_vertex[gl_GlobalInvocationID.x].color += vec4(1, 0, 0, 0); break;" NL
4741			   "    case 1: g_vertex[1].color += g_color1; break;" NL "    case 2: UpdateVertex2(g_two); break;" NL
4742			   "    default: return;" NL "  }" NL "}";
4743		m_program[1] = CreateComputeProgram(glsl_cs);
4744		glLinkProgram(m_program[1]);
4745		if (!CheckProgram(m_program[1]))
4746			return ERROR;
4747
4748		glGenBuffers(1, &m_storage_buffer);
4749		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
4750		glBufferData(GL_SHADER_STORAGE_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STATIC_COPY);
4751
4752		glGenBuffers(1, &m_xfb_buffer);
4753		glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, m_xfb_buffer);
4754		glBufferData(GL_TRANSFORM_FEEDBACK_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STREAM_COPY);
4755
4756		const float in_data[3 * 8] = { -1, -1, 0, 1, 0, 1, 0, 1, 3, -1, 0, 1, 0, 1, 0, 1, -1, 3, 0, 1, 0, 1, 0, 1 };
4757		glGenBuffers(1, &m_vertex_buffer);
4758		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4759		glBufferData(GL_ARRAY_BUFFER, sizeof(in_data), in_data, GL_STATIC_DRAW);
4760		glBindBuffer(GL_ARRAY_BUFFER, 0);
4761
4762		glGenVertexArrays(1, &m_vertex_array);
4763		glBindVertexArray(m_vertex_array);
4764		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4765		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
4766		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
4767		glBindBuffer(GL_ARRAY_BUFFER, 0);
4768		glEnableVertexAttribArray(0);
4769		glEnableVertexAttribArray(1);
4770		glBindVertexArray(0);
4771
4772		glClear(GL_COLOR_BUFFER_BIT);
4773		glUseProgram(m_program[0]);
4774		glBindVertexArray(m_vertex_array);
4775		glBeginTransformFeedback(GL_TRIANGLES);
4776		glDrawArrays(GL_TRIANGLES, 0, 3);
4777		glEndTransformFeedback();
4778
4779		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_xfb_buffer);
4780		glUseProgram(m_program[1]);
4781		glDispatchCompute(1, 1, 1);
4782
4783		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
4784
4785		/* validate storage buffer */
4786		{
4787			float data[3 * 8];
4788			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
4789			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), data);
4790			if (memcmp(data, in_data, sizeof(data)) != 0)
4791			{
4792				m_context.getTestContext().getLog()
4793					<< tcu::TestLog::Message << "Data in shader storage buffer is incorrect."
4794					<< tcu::TestLog::EndMessage;
4795				return ERROR;
4796			}
4797		}
4798		/* validate xfb buffer */
4799		{
4800			const float ref_data[3 * 8] = {
4801				-1, -1, 0, 1, 1, 1, 0, 1, 3, -1, 0, 1, 0, 1, 1, 1, -1, 3, 0, 1, 1, 0, 0, 1
4802			};
4803
4804			float data[3 * 8];
4805			glGetBufferSubData(GL_TRANSFORM_FEEDBACK_BUFFER, 0, sizeof(data), data);
4806			if (memcmp(data, ref_data, sizeof(data)) != 0)
4807			{
4808				m_context.getTestContext().getLog()
4809					<< tcu::TestLog::Message << "Data in xfb buffer is incorrect." << tcu::TestLog::EndMessage;
4810				return ERROR;
4811			}
4812		}
4813		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
4814		{
4815			return ERROR;
4816		}
4817		return NO_ERROR;
4818	}
4819
4820	virtual long Cleanup()
4821	{
4822		glUseProgram(0);
4823		for (int i = 0; i < 2; ++i)
4824			glDeleteProgram(m_program[i]);
4825		glDeleteBuffers(1, &m_vertex_buffer);
4826		glDeleteBuffers(1, &m_storage_buffer);
4827		glDeleteBuffers(1, &m_xfb_buffer);
4828		glDeleteVertexArrays(1, &m_vertex_array);
4829		return NO_ERROR;
4830	}
4831};
4832
4833class AdvancedSharedIndexing : public ComputeShaderBase
4834{
4835	virtual std::string Title()
4836	{
4837		return NL "Shared Memory - Indexing";
4838	}
4839	virtual std::string Purpose()
4840	{
4841		return NL "1. Verify that indexing various types of shared memory works as expected." NL
4842				  "2. Verify that indexing shared memory with different types of expressions work as expected." NL
4843				  "3. Verify that all declaration types of shared structures are supported by the GLSL compiler.";
4844	}
4845	virtual std::string Method()
4846	{
4847		return NL "1. Create CS which uses shared memory in many different ways." NL
4848				  "2. Write to shared memory using different expressions." NL "3. Validate shared memory content." NL
4849				  "4. Use synchronization primitives (barrier, groupMemoryBarrier) where applicable.";
4850	}
4851	virtual std::string PassCriteria()
4852	{
4853		return NL "Everyting works as expected.";
4854	}
4855
4856	GLuint m_program;
4857	GLuint m_texture;
4858
4859	virtual long Setup()
4860	{
4861		m_program = 0;
4862		m_texture = 0;
4863		return NO_ERROR;
4864	}
4865	virtual long Run()
4866	{
4867		const char* const glsl_cs = NL
4868			"layout(binding = 3, rgba32f) uniform image2D g_result_image;" NL
4869			"layout (local_size_x = 4,local_size_y=4 ) in;" NL "shared vec4 g_shared1[4];" NL
4870			"shared mat4 g_shared2;" NL "shared struct {" NL "  float data[4];" NL "} g_shared3[4];" NL
4871			"shared struct Type { float data[4]; } g_shared4[4];" NL "shared Type g_shared5[4];" NL
4872			"uniform bool g_true = true;" NL
4873			"uniform float g_values[16] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };" NL NL
4874			"void Sync() {" NL "  groupMemoryBarrier();" NL "  barrier();" NL "}" NL
4875			"void SetMemory(ivec2 xy, float value) {" NL "  g_shared1[xy.y][gl_LocalInvocationID.x] = value;" NL
4876			"  g_shared2[xy.y][xy.x] = value;" NL "  g_shared3[xy[1]].data[xy[0]] = value;" NL
4877			"  g_shared4[xy.y].data[xy[0]] = value;" NL
4878			"  g_shared5[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] = value;" NL "}" NL
4879			"bool CheckMemory(ivec2 xy, float expected) {" NL
4880			"  if (g_shared1[xy.y][xy[0]] != expected) return false;" NL
4881			"  if (g_shared2[xy[1]][xy[0]] != expected) return false;" NL
4882			"  if (g_shared3[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] != expected) return false;" NL
4883			"  if (g_shared4[gl_LocalInvocationID.y].data[xy.x] != expected) return false;" NL
4884			"  if (g_shared5[xy.y].data[xy.x] != expected) return false;" NL "  return true;" NL "}" NL
4885			"void main() {" NL "  const ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
4886			"  vec4 result = vec4(0, 1, 0, 1);" NL NL
4887			"  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0);" NL "  Sync();" NL
4888			"  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0)) result = vec4(1, 0, 0, 1);" NL NL
4889			"  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0);" NL "  Sync();" NL
4890			"  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0)) result = vec4(1, 0, 0, 1);" NL NL
4891			"  if (g_true && gl_LocalInvocationID.x < 10) {" NL
4892			"    SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0);" NL "    Sync();" NL
4893			"    if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0)) result = vec4(1, 0, 0, 1);" NL
4894			"  }" NL NL "  imageStore(g_result_image, thread_xy, result);" NL "}";
4895		m_program = CreateComputeProgram(glsl_cs);
4896		glLinkProgram(m_program);
4897		if (!CheckProgram(m_program))
4898			return ERROR;
4899
4900		/* init texture */
4901		{
4902			std::vector<vec4> data(4 * 4);
4903			glGenTextures(1, &m_texture);
4904			glBindTexture(GL_TEXTURE_2D, m_texture);
4905			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
4906			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, 4, 4, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
4907			glBindTexture(GL_TEXTURE_2D, 0);
4908		}
4909
4910		glBindImageTexture(3, m_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F);
4911		glUseProgram(m_program);
4912		glDispatchCompute(1, 1, 1);
4913
4914		/* validate render target */
4915		{
4916			std::vector<vec4> data(4 * 4);
4917			glBindTexture(GL_TEXTURE_2D, m_texture);
4918			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
4919			glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_FLOAT, &data[0][0]);
4920			for (std::size_t i = 0; i < data.size(); ++i)
4921			{
4922				if (!IsEqual(data[i], vec4(0, 1, 0, 1)))
4923				{
4924					m_context.getTestContext().getLog()
4925						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
4926					return ERROR;
4927				}
4928			}
4929		}
4930		return NO_ERROR;
4931	}
4932	virtual long Cleanup()
4933	{
4934		glUseProgram(0);
4935		glDeleteProgram(m_program);
4936		glDeleteTextures(1, &m_texture);
4937		return NO_ERROR;
4938	}
4939};
4940
4941class AdvancedSharedMax : public ComputeShaderBase
4942{
4943	virtual std::string Title()
4944	{
4945		return NL "Shared Memory - 32K";
4946	}
4947	virtual std::string Purpose()
4948	{
4949		return NL "Support for 32K of shared memory is required by the OpenGL specifaction. Verify if an "
4950				  "implementation supports it.";
4951	}
4952	virtual std::string Method()
4953	{
4954		return NL "Create and dispatch CS which uses 32K of shared memory.";
4955	}
4956	virtual std::string PassCriteria()
4957	{
4958		return NL "Everything works as expected.";
4959	}
4960
4961	GLuint m_program;
4962	GLuint m_buffer;
4963
4964	virtual long Setup()
4965	{
4966		m_program = 0;
4967		m_buffer  = 0;
4968		return NO_ERROR;
4969	}
4970	virtual long Run()
4971	{
4972		const char* const glsl_cs =
4973			NL "layout(local_size_x = 1024) in;" NL
4974			   "shared struct Type { vec4 v[2]; } g_shared[1024];" // 32768 bytes of shared memory
4975			NL "layout(std430) buffer Output {" NL "  Type g_output[1024];" NL "};" NL NL "void main() {" NL
4976			   "  const int id = int(gl_GlobalInvocationID.x);" NL
4977			   "  g_shared[id].v = vec4[2](vec4(1.0), vec4(1.0));" NL "  memoryBarrierShared();" NL "  barrier();" NL NL
4978			   "  vec4 sum = vec4(0.0);" NL "  int sum_count = 0;" NL "  for (int i = id - 3; i < id + 4; ++i) {" NL
4979			   "    if (id >= 0 && id < g_shared.length()) {" NL "      sum += g_shared[id].v[0];" NL
4980			   "      sum += g_shared[id].v[1];" NL "      sum_count += 2;" NL "    }" NL "  }" NL
4981			   "  if (any(greaterThan(abs((sum / sum_count) - vec4(1.0)), vec4(0.0000001f)))) return;" NL NL
4982			   "  g_output[id] = g_shared[id];" NL "}";
4983		m_program = CreateComputeProgram(glsl_cs);
4984		glLinkProgram(m_program);
4985		if (!CheckProgram(m_program))
4986			return ERROR;
4987
4988		/* init buffer */
4989		{
4990			std::vector<vec4> data(1024 * 2);
4991			glGenBuffers(1, &m_buffer);
4992			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
4993			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0],
4994						 GL_DYNAMIC_COPY);
4995		}
4996
4997		glUseProgram(m_program);
4998		glDispatchCompute(1, 1, 1);
4999		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5000
5001		/* validate buffer */
5002		{
5003			std::vector<vec4> data(1024 * 2);
5004			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0]);
5005			for (std::size_t i = 0; i < data.size(); ++i)
5006			{
5007				if (!IsEqual(data[i], vec4(1.0f)))
5008				{
5009					m_context.getTestContext().getLog()
5010						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
5011					return ERROR;
5012				}
5013			}
5014		}
5015		return NO_ERROR;
5016	}
5017	virtual long Cleanup()
5018	{
5019		glUseProgram(0);
5020		glDeleteProgram(m_program);
5021		glDeleteBuffers(1, &m_buffer);
5022		return NO_ERROR;
5023	}
5024};
5025
5026class AdvancedDynamicPaths : public ComputeShaderBase
5027{
5028	virtual std::string Title()
5029	{
5030		return NL "Dynamic execution paths";
5031	}
5032	virtual std::string Purpose()
5033	{
5034		return NL "1. Verify case where each of the four threads takes different execution path in the CS." NL
5035				  "2. Execution path for each thread is not known at the compilation time." NL
5036				  "    Selection is made based on the result of the texture sampling." NL
5037				  "3. Verify that memory synchronization primitives (memoryBarrier* functions) are accepted" NL
5038				  "    in the control flow.";
5039	}
5040	virtual std::string Method()
5041	{
5042		return NL "1. Create and dispatch CS that takes different execution paths based on the result of the texture "
5043				  "sampling." NL "2. In each execution path use different resources (buffers, samplers, uniform "
5044				  "arrays) to compute output value.";
5045	}
5046	virtual std::string PassCriteria()
5047	{
5048		return NL "Everything works as expected.";
5049	}
5050
5051	GLuint m_program;
5052	GLuint m_buffer[4];
5053	GLuint m_texture[2];
5054
5055	virtual long Setup()
5056	{
5057		m_program = 0;
5058		memset(m_buffer, 0, sizeof(m_buffer));
5059		memset(m_texture, 0, sizeof(m_texture));
5060		return NO_ERROR;
5061	}
5062	virtual long Run()
5063	{
5064		const char* const glsl_cs =
5065			NL "layout(local_size_x = 4) in;" NL "layout(std140, binding = 0) buffer Output {" NL
5066			   "  vec4 g_output[4];" NL "};" NL "uniform isamplerBuffer g_path_buffer;" NL
5067			   "uniform vec4[4] g_input0 = vec4[4](vec4(100), vec4(200), vec4(300), vec4(400));" NL
5068			   "uniform samplerBuffer g_input1;" NL "layout(binding = 1, std430) buffer Input2 {" NL
5069			   "  vec4[4] g_input2;" NL "};" NL NL "void Path2(int id) {" NL
5070			   "  g_output[id] = texelFetch(g_input1, int(gl_LocalInvocationIndex));" NL "}" NL "void main() {" NL
5071			   "  const int id = int(gl_GlobalInvocationID.x);" NL
5072			   "  const int path = texelFetch(g_path_buffer, id).x;" NL NL "  if (path == 0) {" NL
5073			   "    g_output[id] = g_input0[gl_LocalInvocationID.x];" NL "    memoryBarrier();" NL
5074			   "  } else if (path == 1) {" NL "    return;" NL "  } else if (path == 2) {" NL "    Path2(id);" NL
5075			   "    return;" NL "  } else if (path == 3) {" NL "    g_output[id] = g_input2[path - 1];" NL
5076			   "    memoryBarrierBuffer();" NL "  }" NL "}";
5077		m_program = CreateComputeProgram(glsl_cs);
5078		glLinkProgram(m_program);
5079		if (!CheckProgram(m_program))
5080			return ERROR;
5081
5082		glGenBuffers(4, m_buffer);
5083		glGenTextures(2, m_texture);
5084
5085		/* init 'output' buffer */
5086		{
5087			std::vector<vec4> data(4, vec4(-100.0f));
5088			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer[0]);
5089			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0],
5090						 GL_DYNAMIC_COPY);
5091		}
5092		/* init 'input2' buffer */
5093		{
5094			const vec4 data[4] = { vec4(1.0f), vec4(2.0f), vec4(3.0f), vec4(4.0f) };
5095			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_buffer[1]);
5096			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data[0][0], GL_DYNAMIC_COPY);
5097		}
5098		/* init 'path' buffer */
5099		{
5100			const int data[4] = { 3, 2, 1, 0 };
5101			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[2]);
5102			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data[0], GL_STATIC_DRAW);
5103			glBindBuffer(GL_TEXTURE_BUFFER, 0);
5104			glBindTexture(GL_TEXTURE_BUFFER, m_texture[0]);
5105			glTexBuffer(GL_TEXTURE_BUFFER, GL_R32I, m_buffer[2]);
5106			glBindTexture(GL_TEXTURE_BUFFER, 0);
5107		}
5108		/* init 'input1' buffer */
5109		{
5110			const vec4 data[4] = { vec4(10.0f), vec4(20.0f), vec4(30.0f), vec4(40.0f) };
5111			glBindBuffer(GL_TEXTURE_BUFFER, m_buffer[3]);
5112			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data[0], GL_STATIC_DRAW);
5113			glBindBuffer(GL_TEXTURE_BUFFER, 0);
5114			glBindTexture(GL_TEXTURE_BUFFER, m_texture[1]);
5115			glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, m_buffer[3]);
5116			glBindTexture(GL_TEXTURE_BUFFER, 0);
5117		}
5118
5119		glUseProgram(m_program);
5120		glUniform1i(glGetUniformLocation(m_program, "g_path_buffer"), 0);
5121		glUniform1i(glGetUniformLocation(m_program, "g_input1"), 1);
5122		glActiveTexture(GL_TEXTURE0);
5123		glBindTexture(GL_TEXTURE_BUFFER, m_texture[0]);
5124		glActiveTexture(GL_TEXTURE1);
5125		glBindTexture(GL_TEXTURE_BUFFER, m_texture[1]);
5126		glDispatchCompute(1, 1, 1);
5127		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5128
5129		/* validate 'output' buffer */
5130		{
5131			vec4 data[4];
5132			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer[0]);
5133			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data[0][0]);
5134
5135			const vec4 expected[4] = { vec4(3.0f), vec4(20.0f), vec4(-100.0f), vec4(400.0f) };
5136			for (int i = 0; i < 4; ++i)
5137			{
5138				if (!IsEqual(data[i], expected[i]))
5139				{
5140					m_context.getTestContext().getLog()
5141						<< tcu::TestLog::Message << "Invalid data at index " << i << "." << tcu::TestLog::EndMessage;
5142					return ERROR;
5143				}
5144			}
5145		}
5146		return NO_ERROR;
5147	}
5148	virtual long Cleanup()
5149	{
5150		glUseProgram(0);
5151		glDeleteProgram(m_program);
5152		glDeleteBuffers(4, m_buffer);
5153		glDeleteTextures(2, m_texture);
5154		return NO_ERROR;
5155	}
5156};
5157
5158class AdvancedResourcesMax : public ComputeShaderBase
5159{
5160	virtual std::string Title()
5161	{
5162		return NL "Maximum number of resources in one shader";
5163	}
5164	virtual std::string Purpose()
5165	{
5166		return NL "1. Verify that using 8 SSBOs, 12 UBOs, 8 atomic counters, 16 samplers" NL
5167				  "    and 8 images in one CS works as expected.";
5168	}
5169	virtual std::string Method()
5170	{
5171		return NL "Create and dispatch CS. Verify result.";
5172	}
5173	virtual std::string PassCriteria()
5174	{
5175		return NL "Everything works as expected.";
5176	}
5177
5178	GLuint m_program;
5179	GLuint m_storage_buffer[8];
5180	GLuint m_uniform_buffer[12];
5181	GLuint m_atomic_buffer[8];
5182	GLuint m_texture_buffer[16];
5183	GLuint m_texture[16];
5184	GLuint m_image_buffer[8];
5185	GLuint m_image[8];
5186
5187	bool RunIteration(GLuint index)
5188	{
5189		for (GLuint i = 0; i < 8; ++i)
5190		{
5191			const GLuint data = i + 1;
5192			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, m_storage_buffer[i]);
5193			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5194		}
5195		for (GLuint i = 0; i < 12; ++i)
5196		{
5197			const GLuint data = i + 1;
5198			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
5199			glBufferData(GL_UNIFORM_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5200		}
5201		for (GLuint i = 0; i < 8; ++i)
5202		{
5203			const GLuint data = i + 1;
5204			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, i, m_atomic_buffer[i]);
5205			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5206		}
5207		for (GLuint i = 0; i < 16; ++i)
5208		{
5209			const GLuint data = i + 1;
5210			glBindBuffer(GL_TEXTURE_BUFFER, m_texture_buffer[i]);
5211			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data, GL_DYNAMIC_READ);
5212			glBindBuffer(GL_TEXTURE_BUFFER, 0);
5213
5214			glActiveTexture(GL_TEXTURE0 + i);
5215			glBindTexture(GL_TEXTURE_BUFFER, m_texture[i]);
5216			glTexBuffer(GL_TEXTURE_BUFFER, GL_R32UI, m_texture_buffer[i]);
5217		}
5218		for (GLuint i = 0; i < 8; ++i)
5219		{
5220			const GLuint data = i + 1;
5221			glBindBuffer(GL_TEXTURE_BUFFER, m_image_buffer[i]);
5222			glBufferData(GL_TEXTURE_BUFFER, sizeof(data), &data, GL_DYNAMIC_COPY);
5223			glBindBuffer(GL_TEXTURE_BUFFER, 0);
5224
5225			glBindTexture(GL_TEXTURE_BUFFER, m_image[i]);
5226			glTexBuffer(GL_TEXTURE_BUFFER, GL_R32UI, m_image_buffer[i]);
5227			glBindTexture(GL_TEXTURE_BUFFER, 0);
5228
5229			glBindImageTexture(i, m_image[i], 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
5230		}
5231
5232		glUseProgram(m_program);
5233		glUniform1ui(glGetUniformLocation(m_program, "g_index"), index);
5234		/* uniform array */
5235		{
5236			std::vector<GLuint> data(480);
5237			for (GLuint i = 0; i < static_cast<GLuint>(data.size()); ++i)
5238				data[i]   = i + 1;
5239			glUniform1uiv(glGetUniformLocation(m_program, "g_uniform_def"), static_cast<GLsizei>(data.size()),
5240						  &data[0]);
5241		}
5242		glDispatchCompute(1, 1, 1);
5243		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5244
5245		bool result = true;
5246		/* validate buffer */
5247		{
5248			GLuint data;
5249			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[index]);
5250			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5251
5252			if (data != (index + 1) * 6)
5253			{
5254				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data << " should be "
5255													<< (index + 1) * 6 << "." << tcu::TestLog::EndMessage;
5256				result = false;
5257			}
5258			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
5259		}
5260		return result;
5261	}
5262	virtual long Setup()
5263	{
5264		m_program = 0;
5265		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
5266		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
5267		memset(m_atomic_buffer, 0, sizeof(m_atomic_buffer));
5268		memset(m_texture_buffer, 0, sizeof(m_texture_buffer));
5269		memset(m_texture, 0, sizeof(m_texture));
5270		memset(m_image_buffer, 0, sizeof(m_image_buffer));
5271		memset(m_image, 0, sizeof(m_image));
5272		return NO_ERROR;
5273	}
5274	virtual long Run()
5275	{
5276		const char* const glsl_cs =
5277			NL "layout(local_size_x = 1) in;" NL "layout(std140, binding = 0) buffer ShaderStorageBlock {" NL
5278			   "  uint data;" NL "} g_shader_storage[8];" NL "layout(std140, binding = 0) uniform UniformBlock {" NL
5279			   "  uint data;" NL "} g_uniform[12];" NL "layout(binding = 0) uniform usamplerBuffer g_sampler[16];" NL
5280			   "layout(binding = 0, r32ui) uniform uimageBuffer g_image[8];" NL
5281			   "layout(binding = 0, offset = 0) uniform atomic_uint g_atomic_counter0;" NL
5282			   "layout(binding = 1, offset = 0) uniform atomic_uint g_atomic_counter1;" NL
5283			   "layout(binding = 2, offset = 0) uniform atomic_uint g_atomic_counter2;" NL
5284			   "layout(binding = 3, offset = 0) uniform atomic_uint g_atomic_counter3;" NL
5285			   "layout(binding = 4, offset = 0) uniform atomic_uint g_atomic_counter4;" NL
5286			   "layout(binding = 5, offset = 0) uniform atomic_uint g_atomic_counter5;" NL
5287			   "layout(binding = 6, offset = 0) uniform atomic_uint g_atomic_counter6;" NL
5288			   "layout(binding = 7, offset = 0) uniform atomic_uint g_atomic_counter7;" NL
5289			   "uniform uint g_uniform_def[480];" NL "uniform uint g_index = 0u;" NL NL "uint Add() {" NL
5290			   "  switch (g_index) {" NL "    case 0: return atomicCounter(g_atomic_counter0);" NL
5291			   "    case 1: return atomicCounter(g_atomic_counter1);" NL
5292			   "    case 2: return atomicCounter(g_atomic_counter2);" NL
5293			   "    case 3: return atomicCounter(g_atomic_counter3);" NL
5294			   "    case 4: return atomicCounter(g_atomic_counter4);" NL
5295			   "    case 5: return atomicCounter(g_atomic_counter5);" NL
5296			   "    case 6: return atomicCounter(g_atomic_counter6);" NL
5297			   "    case 7: return atomicCounter(g_atomic_counter7);" NL "  }" NL "}" NL "void main() {" NL
5298			   "  g_shader_storage[g_index].data += g_uniform[g_index].data;" NL
5299			   "  g_shader_storage[g_index].data += texelFetch(g_sampler[g_index], 0).x;" NL
5300			   "  g_shader_storage[g_index].data += imageLoad(g_image[g_index], 0).x;" NL
5301			   "  g_shader_storage[g_index].data += Add();" NL
5302			   "  g_shader_storage[g_index].data += g_uniform_def[g_index];" NL "}";
5303		m_program = CreateComputeProgram(glsl_cs);
5304		glLinkProgram(m_program);
5305		if (!CheckProgram(m_program))
5306			return ERROR;
5307
5308		glGenBuffers(16, m_storage_buffer);
5309		glGenBuffers(12, m_uniform_buffer);
5310		glGenBuffers(8, m_atomic_buffer);
5311		glGenBuffers(16, m_texture_buffer);
5312		glGenTextures(16, m_texture);
5313		glGenBuffers(8, m_image_buffer);
5314		glGenTextures(8, m_image);
5315
5316		if (!RunIteration(0))
5317			return ERROR;
5318		if (!RunIteration(1))
5319			return ERROR;
5320		if (!RunIteration(5))
5321			return ERROR;
5322
5323		return NO_ERROR;
5324	}
5325	virtual long Cleanup()
5326	{
5327		glUseProgram(0);
5328		glDeleteProgram(m_program);
5329		glDeleteBuffers(16, m_storage_buffer);
5330		glDeleteBuffers(12, m_uniform_buffer);
5331		glDeleteBuffers(8, m_atomic_buffer);
5332		glDeleteBuffers(16, m_texture_buffer);
5333		glDeleteTextures(16, m_texture);
5334		glDeleteBuffers(8, m_image_buffer);
5335		glDeleteTextures(8, m_image);
5336		return NO_ERROR;
5337	}
5338};
5339
5340class AdvancedFP64Case1 : public ComputeShaderBase
5341{
5342	virtual std::string Title()
5343	{
5344		return NL "FP64 support - built-in math functions";
5345	}
5346	virtual std::string Purpose()
5347	{
5348		return NL "Verify that selected double precision math functions works as expected in the CS.";
5349	}
5350	virtual std::string Method()
5351	{
5352		return NL "Create and dispatch CS which uses double precision math functions. Verify results.";
5353	}
5354	virtual std::string PassCriteria()
5355	{
5356		return NL "Everything works as expected.";
5357	}
5358
5359	GLuint m_program;
5360	GLuint m_storage_buffer[4];
5361	GLuint m_uniform_buffer[2];
5362
5363	virtual long Setup()
5364	{
5365		m_program = 0;
5366		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
5367		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
5368		return NO_ERROR;
5369	}
5370	virtual long Run()
5371	{
5372		const char* const glsl_cs =
5373			NL "layout(local_size_x = 4) in;" NL "layout(std140, binding = 0) buffer ShaderStorageBlock {" NL
5374			   "  double data;" NL "} g_shader_storage[4];" NL "layout(std140, binding = 0) uniform UniformBlock {" NL
5375			   "  double data;" NL "} g_uniform[2];" NL "uniform dvec2 g_uniform_def;" NL NL "void main() {" NL
5376			   "  if (gl_GlobalInvocationID.x == 0) {" NL
5377			   "    g_shader_storage[0].data = floor(g_uniform[0].data + 0.1LF);" // floor(1.1LF) == 1.0LF
5378			NL "  } else if (gl_GlobalInvocationID.x == 1) {" NL
5379			   "    g_shader_storage[1].data = ceil(g_uniform[1].data + 0.2LF);" // ceil(2.2LF) == 3.0LF
5380			NL "  } else if (gl_GlobalInvocationID.x == 2) {" NL
5381			   "    g_shader_storage[2].data = min(g_uniform_def[0] + 0.1LF, 1.0LF);" // min(1.1LF, 1.0LF) == 1.0LF
5382			NL "  } else if (gl_GlobalInvocationID.x == 3) {" NL
5383			   "    g_shader_storage[3].data = max(g_uniform_def[0], g_uniform_def.y);" // max(1.0LF, 2.0LF) == 2.0LF
5384			NL "  }" NL "}";
5385		m_program = CreateComputeProgram(glsl_cs);
5386		glLinkProgram(m_program);
5387		if (!CheckProgram(m_program))
5388			return ERROR;
5389
5390		glGenBuffers(4, m_storage_buffer);
5391		for (GLuint i = 0; i < 4; ++i)
5392		{
5393			const GLdouble data = static_cast<GLdouble>(i + 1);
5394			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, m_storage_buffer[i]);
5395			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5396		}
5397
5398		glGenBuffers(2, m_uniform_buffer);
5399		for (GLuint i = 0; i < 2; ++i)
5400		{
5401			const GLdouble data = static_cast<GLdouble>(i + 1);
5402			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
5403			glBufferData(GL_UNIFORM_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5404		}
5405
5406		glUseProgram(m_program);
5407		glUniform2d(glGetUniformLocation(m_program, "g_uniform_def"), 1.0, 2.0);
5408		glDispatchCompute(1, 1, 1);
5409		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5410
5411		/* validate */
5412		{
5413			const GLdouble expected[4] = { 1.0, 3.0, 1.0, 2.0 };
5414			for (int i = 0; i < 4; ++i)
5415			{
5416				GLdouble data;
5417				glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[i]);
5418				glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5419				if (data != expected[i])
5420				{
5421					m_context.getTestContext().getLog()
5422						<< tcu::TestLog::Message << "Data at index " << i << " is " << data << " should be "
5423						<< expected[i] << "." << tcu::TestLog::EndMessage;
5424					return ERROR;
5425				}
5426			}
5427		}
5428		return NO_ERROR;
5429	}
5430	virtual long Cleanup()
5431	{
5432		glUseProgram(0);
5433		glDeleteProgram(m_program);
5434		glDeleteBuffers(4, m_storage_buffer);
5435		glDeleteBuffers(2, m_uniform_buffer);
5436		return NO_ERROR;
5437	}
5438};
5439
5440class AdvancedFP64Case2 : public ComputeShaderBase
5441{
5442	virtual std::string Title()
5443	{
5444		return NL "FP64 support - uniform variables";
5445	}
5446	virtual std::string Purpose()
5447	{
5448		return NL "1. Verify that all types of double precision uniform variables work as expected in CS." NL
5449				  "2. Verify that all double precision uniform variables can be updated with Uniform* and "
5450				  "ProgramUniform* commands." NL "3. Verify that re-linking CS program works as expected.";
5451	}
5452	virtual std::string Method()
5453	{
5454		return NL "1. Create CS which uses all (double precision) types of uniform variables." NL
5455				  "2. Update uniform variables with ProgramUniform* commands." NL
5456				  "3. Verify that uniform variables were updated correctly." NL "4. Re-link CS program." NL
5457				  "5. Update uniform variables with Uniform* commands." NL
5458				  "6. Verify that uniform variables were updated correctly.";
5459	}
5460	virtual std::string PassCriteria()
5461	{
5462		return NL "Everything works as expected.";
5463	}
5464
5465	GLuint m_program;
5466	GLuint m_storage_buffer;
5467
5468	virtual long Setup()
5469	{
5470		m_program		 = 0;
5471		m_storage_buffer = 0;
5472		return NO_ERROR;
5473	}
5474	virtual long Run()
5475	{
5476		const char* const glsl_cs = NL
5477			"layout(local_size_x = 1) in;" NL "buffer Result {" NL "  int g_result;" NL "};" NL "uniform double g_0;" NL
5478			"uniform dvec2 g_1;" NL "uniform dvec3 g_2;" NL "uniform dvec4 g_3;" NL "uniform dmat2 g_4;" NL
5479			"uniform dmat2x3 g_5;" NL "uniform dmat2x4 g_6;" NL "uniform dmat3x2 g_7;" NL "uniform dmat3 g_8;" NL
5480			"uniform dmat3x4 g_9;" NL "uniform dmat4x2 g_10;" NL "uniform dmat4x3 g_11;" NL "uniform dmat4 g_12;" NL NL
5481			"void main() {" NL "  g_result = 1;" NL NL "  if (g_0 != 1.0LF) g_result = 0;" NL
5482			"  if (g_1 != dvec2(2.0LF, 3.0LF)) g_result = 0;" NL
5483			"  if (g_2 != dvec3(4.0LF, 5.0LF, 6.0LF)) g_result = 0;" NL
5484			"  if (g_3 != dvec4(7.0LF, 8.0LF, 9.0LF, 10.0LF)) g_result = 0;" NL NL
5485			"  if (g_4 != dmat2(11.0LF, 12.0LF, 13.0LF, 14.0LF)) g_result = 0;" NL
5486			"  if (g_5 != dmat2x3(15.0LF, 16.0LF, 17.0LF, 18.0LF, 19.0LF, 20.0LF)) g_result = 0;" NL
5487			"  if (g_6 != dmat2x4(21.0LF, 22.0LF, 23.0LF, 24.0LF, 25.0LF, 26.0LF, 27.0LF, 28.0LF)) g_result = 0;" NL NL
5488			"  if (g_7 != dmat3x2(29.0LF, 30.0LF, 31.0LF, 32.0LF, 33.0LF, 34.0LF)) g_result = 0;" NL
5489			"  if (g_8 != dmat3(35.0LF, 36.0LF, 37.0LF, 38.0LF, 39.0LF, 40.0LF, 41.0LF, 42.0LF, 43.0LF)) g_result = "
5490			"0;" NL "  if (g_9 != dmat3x4(44.0LF, 45.0LF, 46.0LF, 47.0LF, 48.0LF, 49.0LF, 50.0LF, 51.0LF, 52.0LF, "
5491			"53.0LF, 54.0LF, 55.0LF)) g_result = 0;" NL NL
5492			"  if (g_10 != dmat4x2(56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0)) g_result = 0;" NL
5493			"  if (g_11 != dmat4x3(63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0)) g_result = "
5494			"0;" NL "  if (g_12 != dmat4(75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, "
5495			"88.0, 89.0, 90.0)) g_result = 0;" NL "}";
5496		m_program = CreateComputeProgram(glsl_cs);
5497		glLinkProgram(m_program);
5498		if (!CheckProgram(m_program))
5499			return ERROR;
5500
5501		glGenBuffers(1, &m_storage_buffer);
5502		/* create buffer */
5503		{
5504			const int data = 123;
5505			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5506			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5507		}
5508
5509		glProgramUniform1d(m_program, glGetUniformLocation(m_program, "g_0"), 1.0);
5510		glProgramUniform2d(m_program, glGetUniformLocation(m_program, "g_1"), 2.0, 3.0);
5511		glProgramUniform3d(m_program, glGetUniformLocation(m_program, "g_2"), 4.0, 5.0, 6.0);
5512		glProgramUniform4d(m_program, glGetUniformLocation(m_program, "g_3"), 7.0, 8.0, 9.0, 10.0);
5513
5514		/* mat2 */
5515		{
5516			const GLdouble value[4] = { 11.0, 12.0, 13.0, 14.0 };
5517			glProgramUniformMatrix2dv(m_program, glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
5518		}
5519		/* mat2x3 */
5520		{
5521			const GLdouble value[6] = { 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 };
5522			glProgramUniformMatrix2x3dv(m_program, glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
5523		}
5524		/* mat2x4 */
5525		{
5526			const GLdouble value[8] = { 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0 };
5527			glProgramUniformMatrix2x4dv(m_program, glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
5528		}
5529
5530		/* mat3x2 */
5531		{
5532			const GLdouble value[6] = { 29.0, 30.0, 31.0, 32.0, 33.0, 34.0 };
5533			glProgramUniformMatrix3x2dv(m_program, glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
5534		}
5535		/* mat3 */
5536		{
5537			const GLdouble value[9] = { 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0 };
5538			glProgramUniformMatrix3dv(m_program, glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
5539		}
5540		/* mat3x4 */
5541		{
5542			const GLdouble value[12] = { 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0 };
5543			glProgramUniformMatrix3x4dv(m_program, glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
5544		}
5545
5546		/* mat4x2 */
5547		{
5548			const GLdouble value[8] = { 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0 };
5549			glProgramUniformMatrix4x2dv(m_program, glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
5550		}
5551		/* mat4x3 */
5552		{
5553			const GLdouble value[12] = { 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0 };
5554			glProgramUniformMatrix4x3dv(m_program, glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
5555		}
5556		/* mat4 */
5557		{
5558			const GLdouble value[16] = { 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0,
5559										 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0 };
5560			glProgramUniformMatrix4dv(m_program, glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
5561		}
5562
5563		glUseProgram(m_program);
5564		glDispatchCompute(1, 1, 1);
5565		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5566
5567		/* validate */
5568		{
5569			int data;
5570			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5571			if (data != 1)
5572			{
5573				m_context.getTestContext().getLog()
5574					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
5575				return ERROR;
5576			}
5577		}
5578
5579		// re-link program (all uniforms will be set to zero)
5580		glLinkProgram(m_program);
5581
5582		/* clear buffer */
5583		{
5584			const int data = 123;
5585			glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5586		}
5587
5588		glUniform1d(glGetUniformLocation(m_program, "g_0"), 1.0);
5589		glUniform2d(glGetUniformLocation(m_program, "g_1"), 2.0, 3.0);
5590		glUniform3d(glGetUniformLocation(m_program, "g_2"), 4.0, 5.0, 6.0);
5591		glUniform4d(glGetUniformLocation(m_program, "g_3"), 7.0, 8.0, 9.0, 10.0);
5592
5593		/* mat2 */
5594		{
5595			const GLdouble value[4] = { 11.0, 12.0, 13.0, 14.0 };
5596			glUniformMatrix2dv(glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
5597		}
5598		/* mat2x3 */
5599		{
5600			const GLdouble value[6] = { 15.0, 16.0, 17.0, 18.0, 19.0, 20.0 };
5601			glUniformMatrix2x3dv(glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
5602		}
5603		/* mat2x4 */
5604		{
5605			const GLdouble value[8] = { 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0 };
5606			glUniformMatrix2x4dv(glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
5607		}
5608
5609		/* mat3x2 */
5610		{
5611			const GLdouble value[6] = { 29.0, 30.0, 31.0, 32.0, 33.0, 34.0 };
5612			glUniformMatrix3x2dv(glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
5613		}
5614		/* mat3 */
5615		{
5616			const GLdouble value[9] = { 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0 };
5617			glUniformMatrix3dv(glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
5618		}
5619		/* mat3x4 */
5620		{
5621			const GLdouble value[12] = { 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0 };
5622			glUniformMatrix3x4dv(glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
5623		}
5624
5625		/* mat4x2 */
5626		{
5627			const GLdouble value[8] = { 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0 };
5628			glUniformMatrix4x2dv(glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
5629		}
5630		/* mat4x3 */
5631		{
5632			const GLdouble value[12] = { 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0 };
5633			glUniformMatrix4x3dv(glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
5634		}
5635		/* mat4 */
5636		{
5637			const GLdouble value[16] = { 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0,
5638										 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0 };
5639			glUniformMatrix4dv(glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
5640		}
5641
5642		glDispatchCompute(1, 1, 1);
5643		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5644
5645		/* validate */
5646		{
5647			int data;
5648			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5649			if (data != 1)
5650			{
5651				m_context.getTestContext().getLog()
5652					<< tcu::TestLog::Message << "Data is " << data << " should be 1." << tcu::TestLog::EndMessage;
5653				return ERROR;
5654			}
5655		}
5656
5657		return NO_ERROR;
5658	}
5659	virtual long Cleanup()
5660	{
5661		glUseProgram(0);
5662		glDeleteProgram(m_program);
5663		glDeleteBuffers(1, &m_storage_buffer);
5664		return NO_ERROR;
5665	}
5666};
5667
5668class AdvancedFP64Case3 : public ComputeShaderBase
5669{
5670	virtual std::string Title()
5671	{
5672		return NL "FP64 support - subroutines";
5673	}
5674	virtual std::string Purpose()
5675	{
5676		return NL "Verify that subroutines that performs double precision computation works as expected in the CS.";
5677	}
5678	virtual std::string Method()
5679	{
5680		return NL
5681			"Create and dispatch CS that uses double precision math functions in subroutines to compute output values.";
5682	}
5683	virtual std::string PassCriteria()
5684	{
5685		return NL "Everything works as expected.";
5686	}
5687
5688	GLuint m_program;
5689	GLuint m_storage_buffer;
5690
5691	virtual long Setup()
5692	{
5693		m_program		 = 0;
5694		m_storage_buffer = 0;
5695		return NO_ERROR;
5696	}
5697	virtual long Run()
5698	{
5699		const char* const glsl_cs =
5700			NL "layout(local_size_x = 1) in;" NL "uniform double[4] g_input;" NL "uniform int index;" NL
5701			   "layout(std430, binding = 0) buffer Output {" NL "  double g_output[4];" NL "};" NL
5702			   "subroutine double MathFunc(double x);" NL "subroutine uniform MathFunc g_func[4];" NL
5703			   "subroutine(MathFunc)" NL "double Func0(double x) {" NL "  return abs(x);" // abs(-1.0LF) == 1.0LF
5704			NL "}" NL "subroutine(MathFunc)" NL "double Func1(double x) {" NL
5705			   "  return round(x);" // round(2.2LF) == 2.0LF
5706			NL "}" NL "subroutine(MathFunc)" NL "double Func2(double x) {" NL
5707			   "  return sign(x);" // sign(3.0LF) == 1.0LF
5708			NL "}" NL "subroutine(MathFunc)" NL "double Func3(double x) {" NL
5709			   "  return fract(x);" // fract(4.1LF) == 0.1LF
5710			NL "}" NL "void main() {" NL "  int i = index;" NL "  g_output[i] = g_func[i](g_input[i]);" NL "}";
5711		m_program = CreateComputeProgram(glsl_cs);
5712		glLinkProgram(m_program);
5713		if (!CheckProgram(m_program))
5714			return ERROR;
5715
5716		glGenBuffers(1, &m_storage_buffer);
5717		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5718		glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(double), NULL, GL_STATIC_DRAW);
5719
5720		const GLuint index_compute0 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func0");
5721		const GLuint index_compute1 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func1");
5722		const GLuint index_compute2 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func2");
5723		const GLuint index_compute3 = glGetSubroutineIndex(m_program, GL_COMPUTE_SHADER, "Func3");
5724		const GLint  loc_compute0   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[0]");
5725		const GLint  loc_compute1   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[1]");
5726		const GLint  loc_compute2   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[2]");
5727		const GLint  loc_compute3   = glGetSubroutineUniformLocation(m_program, GL_COMPUTE_SHADER, "g_func[3]");
5728
5729		glUseProgram(m_program);
5730
5731		// setup subroutines
5732		GLuint indices[4];
5733		indices[loc_compute0] = index_compute0;
5734		indices[loc_compute1] = index_compute1;
5735		indices[loc_compute2] = index_compute2;
5736		indices[loc_compute3] = index_compute3;
5737		glUniformSubroutinesuiv(GL_COMPUTE_SHADER, 4, indices);
5738
5739		/* set uniforms */
5740		{
5741			const GLdouble data[4] = { -1.0, 2.2, 3.0, 4.1 };
5742			glUniform1dv(glGetUniformLocation(m_program, "g_input"), 4, data);
5743		}
5744		glUniform1i(glGetUniformLocation(m_program, "index"), 0);
5745		glDispatchCompute(1, 1, 1);
5746		glUniform1i(glGetUniformLocation(m_program, "index"), 1);
5747		glDispatchCompute(1, 1, 1);
5748		glUniform1i(glGetUniformLocation(m_program, "index"), 2);
5749		glDispatchCompute(1, 1, 1);
5750		glUniform1i(glGetUniformLocation(m_program, "index"), 3);
5751		glDispatchCompute(1, 1, 1);
5752		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5753
5754		/* validate */
5755		{
5756			const GLdouble expected[4] = { 1.0, 2.0, 1.0, 0.1 };
5757			GLdouble	   data[4];
5758			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
5759			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5760			for (int i = 0; i < 4; ++i)
5761			{
5762				if (fabs(data[i] - expected[i]) > g_color_eps.x())
5763				{
5764					m_context.getTestContext().getLog()
5765						<< tcu::TestLog::Message << "Data at index " << i << " is " << data[i] << " should be "
5766						<< expected[i] << "." << tcu::TestLog::EndMessage;
5767					return ERROR;
5768				}
5769			}
5770		}
5771		return NO_ERROR;
5772	}
5773	virtual long Cleanup()
5774	{
5775		glUseProgram(0);
5776		glDeleteProgram(m_program);
5777		glDeleteBuffers(1, &m_storage_buffer);
5778		return NO_ERROR;
5779	}
5780};
5781
5782class AdvancedConditionalDispatching : public ComputeShaderBase
5783{
5784	virtual std::string Title()
5785	{
5786		return NL "Conditional Dispatching";
5787	}
5788	virtual std::string Purpose()
5789	{
5790		return NL "Verify that DispatchCompute and DispatchComputeIndirect commands work as expected inside "
5791				  "conditional blocks.";
5792	}
5793	virtual std::string Method()
5794	{
5795		return NL "1. Render two quads. One will pass depth-test and the second one will not." NL
5796				  "2. Use GL_ANY_SAMPLES_PASSED query objects to 'remember' these results." NL
5797				  "3. Use DispatchCompute and DispatchComputeIndirect commands inside conditional blocks using both "
5798				  "query objects." NL
5799				  "4. Verify that DispatchCompute and DispatchComputeIndirect commands are only executed in" NL
5800				  "    the conditional block that uses query object that has passed depth-test.";
5801	}
5802	virtual std::string PassCriteria()
5803	{
5804		return NL "Everything works as expected.";
5805	}
5806
5807	GLuint m_program_vsfs;
5808	GLuint m_program_cs;
5809	GLuint m_vertex_array;
5810	GLuint m_query[2];
5811	GLuint m_storage_buffer;
5812	GLuint m_dispatch_buffer;
5813
5814	virtual long Setup()
5815	{
5816		m_program_vsfs = 0;
5817		m_program_cs   = 0;
5818		m_vertex_array = 0;
5819		memset(m_query, 0, sizeof(m_query));
5820		m_storage_buffer  = 0;
5821		m_dispatch_buffer = 0;
5822		return NO_ERROR;
5823	}
5824	virtual long Run()
5825	{
5826		const char* const glsl_vs = NL
5827			"uniform float g_depth;" NL "uniform vec2[3] g_vertex = vec2[3](vec2(-1, -1), vec2(3, -1), vec2(-1, 3));" NL
5828			"void main() {" NL "  gl_Position = vec4(g_vertex[gl_VertexID], g_depth, 1);" NL "}";
5829
5830		const char* const glsl_fs =
5831			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(0, 1, 0, 1);" NL "}";
5832
5833		m_program_vsfs = CreateProgram(glsl_vs, glsl_fs);
5834		glLinkProgram(m_program_vsfs);
5835		if (!CheckProgram(m_program_vsfs))
5836			return ERROR;
5837
5838		const char* const glsl_cs =
5839			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  int g_output;" NL "};" NL
5840			   "void main() {" NL "  atomicAdd(g_output, 1);" NL "}";
5841		m_program_cs = CreateComputeProgram(glsl_cs);
5842		glLinkProgram(m_program_cs);
5843		if (!CheckProgram(m_program_cs))
5844			return ERROR;
5845
5846		/* create storage buffer */
5847		{
5848			const int data = 0;
5849			glGenBuffers(1, &m_storage_buffer);
5850			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5851			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_DYNAMIC_COPY);
5852		}
5853		/* create dispatch buffer */
5854		{
5855			const GLuint data[3] = { 2, 2, 2 };
5856			glGenBuffers(1, &m_dispatch_buffer);
5857			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
5858			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
5859		}
5860
5861		glGenVertexArrays(1, &m_vertex_array);
5862		glGenQueries(2, m_query);
5863
5864		glEnable(GL_DEPTH_TEST);
5865		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
5866
5867		glUseProgram(m_program_vsfs);
5868		glBindVertexArray(m_vertex_array);
5869
5870		// this draw call will pass depth test
5871		glBeginQuery(GL_ANY_SAMPLES_PASSED, m_query[0]);
5872		glUniform1f(glGetUniformLocation(m_program_vsfs, "g_depth"), 0.0f);
5873		glDrawArrays(GL_TRIANGLES, 0, 3);
5874		glEndQuery(GL_ANY_SAMPLES_PASSED);
5875
5876		// this draw call will NOT pass depth test
5877		glBeginQuery(GL_ANY_SAMPLES_PASSED, m_query[1]);
5878		glUniform1f(glGetUniformLocation(m_program_vsfs, "g_depth"), 0.5f);
5879		glDrawArrays(GL_TRIANGLES, 0, 3);
5880		glEndQuery(GL_ANY_SAMPLES_PASSED);
5881
5882		glDisable(GL_DEPTH_TEST);
5883
5884		glUseProgram(m_program_cs);
5885
5886		// these commands should be executed normally
5887		glBeginConditionalRender(m_query[0], GL_QUERY_WAIT);
5888		glDispatchCompute(2, 2, 2);
5889		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
5890		glDispatchComputeIndirect(0);
5891		glEndConditionalRender();
5892
5893		/* validate */
5894		{
5895			int data;
5896			glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
5897			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5898			if (data != 16)
5899			{
5900				m_context.getTestContext().getLog()
5901					<< tcu::TestLog::Message << "Data is " << data << " should be 16." << tcu::TestLog::EndMessage;
5902				return ERROR;
5903			}
5904		}
5905
5906		// these commands should be discarded
5907		glBeginConditionalRender(m_query[1], GL_QUERY_WAIT);
5908		glDispatchCompute(2, 2, 2);
5909		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
5910		glDispatchComputeIndirect(0);
5911		glEndConditionalRender();
5912
5913		/* validate */
5914		{
5915			int data;
5916			glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
5917			glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(data), &data);
5918			if (data != 16 && m_context.getRenderContext().getRenderTarget().getDepthBits() != 0)
5919			{
5920				m_context.getTestContext().getLog()
5921					<< tcu::TestLog::Message << "Data is " << data << " should be 16." << tcu::TestLog::EndMessage;
5922				return ERROR;
5923			}
5924			else if (data != 32 && m_context.getRenderContext().getRenderTarget().getDepthBits() == 0)
5925			{
5926				m_context.getTestContext().getLog()
5927					<< tcu::TestLog::Message << "Data is " << data << " should be 32." << tcu::TestLog::EndMessage;
5928				return ERROR;
5929			}
5930		}
5931
5932		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
5933		{
5934			return ERROR;
5935		}
5936
5937		return NO_ERROR;
5938	}
5939	virtual long Cleanup()
5940	{
5941		glUseProgram(0);
5942		glDeleteProgram(m_program_vsfs);
5943		glDeleteProgram(m_program_cs);
5944		glDeleteVertexArrays(1, &m_vertex_array);
5945		glDeleteQueries(2, m_query);
5946		glDeleteBuffers(1, &m_storage_buffer);
5947		glDeleteBuffers(1, &m_dispatch_buffer);
5948		return NO_ERROR;
5949	}
5950};
5951
5952class NegativeAPINoActiveProgram : public ComputeShaderBase
5953{
5954	virtual std::string Title()
5955	{
5956		return NL "API errors - no active program";
5957	}
5958	virtual std::string Purpose()
5959	{
5960		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5961	}
5962	virtual std::string Method()
5963	{
5964		return NL "";
5965	}
5966	virtual std::string PassCriteria()
5967	{
5968		return NL "";
5969	}
5970
5971	GLuint m_program;
5972
5973	virtual long Setup()
5974	{
5975		m_program = 0;
5976		return NO_ERROR;
5977	}
5978	virtual long Run()
5979	{
5980		glDispatchCompute(1, 2, 3);
5981		if (glGetError() != GL_INVALID_OPERATION)
5982		{
5983			m_context.getTestContext().getLog()
5984				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
5985				<< "DispatchComputeIndirect if there is no active program for the compute\n"
5986				<< "shader stage." << tcu::TestLog::EndMessage;
5987			return ERROR;
5988		}
5989
5990		/* indirect dispatch */
5991		{
5992			GLuint		 buffer;
5993			const GLuint num_group[3] = { 3, 2, 1 };
5994			glGenBuffers(1, &buffer);
5995			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
5996			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
5997			glDispatchComputeIndirect(0);
5998			glDeleteBuffers(1, &buffer);
5999			if (glGetError() != GL_INVALID_OPERATION)
6000			{
6001				m_context.getTestContext().getLog()
6002					<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
6003					<< "DispatchComputeIndirect if there is no active program for the compute\n"
6004					<< "shader stage." << tcu::TestLog::EndMessage;
6005				return ERROR;
6006			}
6007		}
6008
6009		const char* const glsl_vs =
6010			NL "layout(location = 0) in vec4 g_position;" NL "void main() {" NL "  gl_Position = g_position;" NL "}";
6011
6012		const char* const glsl_fs =
6013			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
6014
6015		m_program = CreateProgram(glsl_vs, glsl_fs);
6016		glLinkProgram(m_program);
6017		if (!CheckProgram(m_program))
6018			return ERROR;
6019
6020		glUseProgram(m_program);
6021
6022		glDispatchCompute(1, 2, 3);
6023		if (glGetError() != GL_INVALID_OPERATION)
6024		{
6025			m_context.getTestContext().getLog()
6026				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
6027				<< "DispatchComputeIndirect if there is no active program for the compute\n"
6028				<< "shader stage." << tcu::TestLog::EndMessage;
6029			return ERROR;
6030		}
6031
6032		/* indirect dispatch */
6033		{
6034			GLuint		 buffer;
6035			const GLuint num_group[3] = { 3, 2, 1 };
6036			glGenBuffers(1, &buffer);
6037			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
6038			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
6039			glDispatchComputeIndirect(0);
6040			glDeleteBuffers(1, &buffer);
6041			if (glGetError() != GL_INVALID_OPERATION)
6042			{
6043				m_context.getTestContext().getLog()
6044					<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
6045					<< "DispatchComputeIndirect if there is no active program for the compute\n"
6046					<< "shader stage." << tcu::TestLog::EndMessage;
6047				return ERROR;
6048			}
6049		}
6050
6051		return NO_ERROR;
6052	}
6053	virtual long Cleanup()
6054	{
6055		glUseProgram(0);
6056		glDeleteProgram(m_program);
6057		return NO_ERROR;
6058	}
6059};
6060
6061class NegativeAPIWorkGroupCount : public ComputeShaderBase
6062{
6063	virtual std::string Title()
6064	{
6065		return NL "API errors - invalid work group count";
6066	}
6067	virtual std::string Purpose()
6068	{
6069		return NL "Verify that appropriate errors are generated by the OpenGL API.";
6070	}
6071	virtual std::string Method()
6072	{
6073		return NL "";
6074	}
6075	virtual std::string PassCriteria()
6076	{
6077		return NL "";
6078	}
6079
6080	GLuint m_program;
6081	GLuint m_storage_buffer;
6082
6083	virtual long Setup()
6084	{
6085		m_program		 = 0;
6086		m_storage_buffer = 0;
6087		return NO_ERROR;
6088	}
6089	virtual long Run()
6090	{
6091		const char* const glsl_cs =
6092			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
6093			   "void main() {" NL
6094			   "  g_output[gl_GlobalInvocationID.x * gl_GlobalInvocationID.y * gl_GlobalInvocationID.z] = 0;" NL "}";
6095		m_program = CreateComputeProgram(glsl_cs);
6096		glLinkProgram(m_program);
6097		if (!CheckProgram(m_program))
6098			return ERROR;
6099
6100		glGenBuffers(1, &m_storage_buffer);
6101		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
6102		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
6103
6104		GLint x, y, z;
6105		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &x);
6106		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &y);
6107		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &z);
6108
6109		glUseProgram(m_program);
6110
6111		glDispatchCompute(x + 1, 1, 1);
6112		if (glGetError() != GL_INVALID_VALUE)
6113		{
6114			m_context.getTestContext().getLog()
6115				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
6116				<< "<num_groups_y> or <num_groups_z> is greater than the value of\n"
6117				<< "MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension." << tcu::TestLog::EndMessage;
6118			return ERROR;
6119		}
6120
6121		glDispatchCompute(1, y + 1, 1);
6122		if (glGetError() != GL_INVALID_VALUE)
6123		{
6124			m_context.getTestContext().getLog()
6125				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
6126				<< "<num_groups_y> or <num_groups_z> is greater than the value of\n"
6127				<< "MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension." << tcu::TestLog::EndMessage;
6128			return ERROR;
6129		}
6130
6131		glDispatchCompute(1, 1, z + 1);
6132		if (glGetError() != GL_INVALID_VALUE)
6133		{
6134			m_context.getTestContext().getLog()
6135				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
6136				<< "<num_groups_y> or <num_groups_z> is greater than the value of\n"
6137				<< "MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension." << tcu::TestLog::EndMessage;
6138			return ERROR;
6139		}
6140
6141		return NO_ERROR;
6142	}
6143	virtual long Cleanup()
6144	{
6145		glUseProgram(0);
6146		glDeleteProgram(m_program);
6147		glDeleteBuffers(1, &m_storage_buffer);
6148		return NO_ERROR;
6149	}
6150};
6151
6152class NegativeAPIIndirect : public ComputeShaderBase
6153{
6154	virtual std::string Title()
6155	{
6156		return NL "API errors - incorrect DispatchComputeIndirect usage";
6157	}
6158	virtual std::string Purpose()
6159	{
6160		return NL "Verify that appropriate errors are generated by the OpenGL API.";
6161	}
6162	virtual std::string Method()
6163	{
6164		return NL "";
6165	}
6166	virtual std::string PassCriteria()
6167	{
6168		return NL "";
6169	}
6170
6171	GLuint m_program;
6172	GLuint m_storage_buffer;
6173	GLuint m_dispatch_buffer;
6174
6175	virtual long Setup()
6176	{
6177		m_program		  = 0;
6178		m_storage_buffer  = 0;
6179		m_dispatch_buffer = 0;
6180		return NO_ERROR;
6181	}
6182
6183	virtual long Run()
6184	{
6185		const char* const glsl_cs =
6186			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
6187			   "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
6188		m_program = CreateComputeProgram(glsl_cs);
6189		glLinkProgram(m_program);
6190		if (!CheckProgram(m_program))
6191			return ERROR;
6192
6193		glGenBuffers(1, &m_storage_buffer);
6194		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
6195		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
6196
6197		const GLuint num_groups[6] = { 1, 1, 1, 1, 1, 1 };
6198		glGenBuffers(1, &m_dispatch_buffer);
6199		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
6200		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_COPY);
6201
6202		glUseProgram(m_program);
6203
6204		glDispatchComputeIndirect(-2);
6205		if (glGetError() != GL_INVALID_VALUE)
6206		{
6207			m_context.getTestContext().getLog()
6208				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
6209				<< "less than zero or not a multiple of four." << tcu::TestLog::EndMessage;
6210			return ERROR;
6211		}
6212
6213		glDispatchComputeIndirect(3);
6214		if (glGetError() != GL_INVALID_VALUE)
6215		{
6216			m_context.getTestContext().getLog()
6217				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
6218				<< "less than zero or not a multiple of four." << tcu::TestLog::EndMessage;
6219			return ERROR;
6220		}
6221
6222		glDispatchComputeIndirect(16);
6223		if (glGetError() != GL_INVALID_OPERATION)
6224		{
6225			m_context.getTestContext().getLog()
6226				<< tcu::TestLog::Message
6227				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
6228				<< "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
6229				<< "beyond the end of the bound buffer object." << tcu::TestLog::EndMessage;
6230			return ERROR;
6231		}
6232
6233		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
6234		glDispatchComputeIndirect(0);
6235		if (glGetError() != GL_INVALID_OPERATION)
6236		{
6237			m_context.getTestContext().getLog()
6238				<< tcu::TestLog::Message
6239				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
6240				<< "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
6241				<< "beyond the end of the bound buffer object." << tcu::TestLog::EndMessage;
6242			return ERROR;
6243		}
6244
6245		return NO_ERROR;
6246	}
6247	virtual long Cleanup()
6248	{
6249		glUseProgram(0);
6250		glDeleteProgram(m_program);
6251		glDeleteBuffers(1, &m_storage_buffer);
6252		glDeleteBuffers(1, &m_dispatch_buffer);
6253		return NO_ERROR;
6254	}
6255};
6256
6257class NegativeAPIProgram : public ComputeShaderBase
6258{
6259	virtual std::string Title()
6260	{
6261		return NL "API errors - program state";
6262	}
6263	virtual std::string Purpose()
6264	{
6265		return NL "Verify that appropriate errors are generated by the OpenGL API.";
6266	}
6267	virtual std::string Method()
6268	{
6269		return NL "";
6270	}
6271	virtual std::string PassCriteria()
6272	{
6273		return NL "";
6274	}
6275
6276	GLuint m_program;
6277	GLuint m_storage_buffer;
6278
6279	virtual long Setup()
6280	{
6281		m_program		 = 0;
6282		m_storage_buffer = 0;
6283		return NO_ERROR;
6284	}
6285	virtual long Run()
6286	{
6287		const char* const glsl_vs =
6288			NL "layout(location = 0) in vec4 g_position;" NL "void main() {" NL "  gl_Position = g_position;" NL "}";
6289
6290		const char* const glsl_fs =
6291			NL "layout(location = 0) out vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
6292		m_program = CreateProgram(glsl_vs, glsl_fs);
6293
6294		GLint v[3];
6295		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
6296		if (glGetError() != GL_INVALID_OPERATION)
6297		{
6298			m_context.getTestContext().getLog()
6299				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
6300				<< "COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
6301				<< "successfully, or has been linked but contains no compute shaders." << tcu::TestLog::EndMessage;
6302			return ERROR;
6303		}
6304
6305		glLinkProgram(m_program);
6306		if (!CheckProgram(m_program))
6307			return ERROR;
6308
6309		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
6310		if (glGetError() != GL_INVALID_OPERATION)
6311		{
6312			m_context.getTestContext().getLog()
6313				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
6314				<< "COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
6315				<< "successfully, or has been linked but contains no compute shaders." << tcu::TestLog::EndMessage;
6316			return ERROR;
6317		}
6318		glDeleteProgram(m_program);
6319
6320		const char* const glsl_cs =
6321			"#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
6322			"  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
6323		m_program = glCreateProgram();
6324
6325		GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
6326		glAttachShader(m_program, sh);
6327		glDeleteShader(sh);
6328		glShaderSource(sh, 1, &glsl_cs, NULL);
6329		glCompileShader(sh);
6330
6331		sh = glCreateShader(GL_VERTEX_SHADER);
6332		glAttachShader(m_program, sh);
6333		glDeleteShader(sh);
6334		glShaderSource(sh, 1, &glsl_vs, NULL);
6335		glCompileShader(sh);
6336
6337		sh = glCreateShader(GL_FRAGMENT_SHADER);
6338		glAttachShader(m_program, sh);
6339		glDeleteShader(sh);
6340		glShaderSource(sh, 1, &glsl_fs, NULL);
6341		glCompileShader(sh);
6342
6343		glLinkProgram(m_program);
6344		GLint status;
6345		glGetProgramiv(m_program, GL_LINK_STATUS, &status);
6346		if (status == GL_TRUE)
6347		{
6348			m_context.getTestContext().getLog()
6349				<< tcu::TestLog::Message << "LinkProgram will fail if <program> contains a combination"
6350				<< " of compute and\n non-compute shaders.\n"
6351				<< tcu::TestLog::EndMessage;
6352			return ERROR;
6353		}
6354
6355		return NO_ERROR;
6356	}
6357	virtual long Cleanup()
6358	{
6359		glUseProgram(0);
6360		glDeleteProgram(m_program);
6361		glDeleteBuffers(1, &m_storage_buffer);
6362		return NO_ERROR;
6363	}
6364};
6365
6366class NegativeGLSLCompileTimeErrors : public ComputeShaderBase
6367{
6368	virtual std::string Title()
6369	{
6370		return NL "Compile-time errors";
6371	}
6372	virtual std::string Purpose()
6373	{
6374		return NL "Verify that appropriate errors are generated by the GLSL compiler.";
6375	}
6376	virtual std::string Method()
6377	{
6378		return NL "";
6379	}
6380	virtual std::string PassCriteria()
6381	{
6382		return NL "";
6383	}
6384
6385	static std::string Shader1(int x, int y, int z)
6386	{
6387		std::stringstream ss;
6388		ss << "#version 430 core" NL "layout(local_size_x = " << x << ", local_size_y = " << y
6389		   << ", local_size_z = " << z << ") in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
6390										  "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
6391		return ss.str();
6392	}
6393	virtual long Run()
6394	{
6395		// gl_GlobalInvocationID requires "#version 430" or later or GL_ARB_compute_shader
6396		// extension enabled
6397		if (!Compile("#version 420 core" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
6398					 "  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL
6399					 "}"))
6400			return ERROR;
6401
6402		if (!Compile("#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(local_size_x = 2) in;" NL
6403					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
6404					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
6405			return ERROR;
6406
6407		if (!Compile("#version 430 core" NL "layout(local_size_x = 1) in;" NL "in uint x;" NL
6408					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
6409					 "  g_output[gl_GlobalInvocationID.x] = x;" NL "}"))
6410			return ERROR;
6411
6412		if (!Compile("#version 430 core" NL "layout(local_size_x = 1) in;" NL "out uint x;" NL
6413					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
6414					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "  x = 0;" NL "}"))
6415			return ERROR;
6416
6417		{
6418			GLint x, y, z;
6419			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &x);
6420			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &y);
6421			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &z);
6422
6423			if (!Compile(Shader1(x + 1, 1, 1)))
6424				return ERROR;
6425			if (!Compile(Shader1(1, y + 1, 1)))
6426				return ERROR;
6427			if (!Compile(Shader1(1, 1, z + 1)))
6428				return ERROR;
6429		}
6430
6431		return NO_ERROR;
6432	}
6433
6434	bool Compile(const std::string& source)
6435	{
6436		const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
6437
6438		const char* const src = source.c_str();
6439		glShaderSource(sh, 1, &src, NULL);
6440		glCompileShader(sh);
6441
6442		GLchar log[1024];
6443		glGetShaderInfoLog(sh, sizeof(log), NULL, log);
6444		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Shader Info Log:\n"
6445											<< log << tcu::TestLog::EndMessage;
6446
6447		GLint status;
6448		glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
6449		glDeleteShader(sh);
6450
6451		if (status == GL_TRUE)
6452		{
6453			m_context.getTestContext().getLog()
6454				<< tcu::TestLog::Message << "Compilation should fail." << tcu::TestLog::EndMessage;
6455			return false;
6456		}
6457
6458		return true;
6459	}
6460};
6461
6462class NegativeGLSLLinkTimeErrors : public ComputeShaderBase
6463{
6464	virtual std::string Title()
6465	{
6466		return NL "Link-time errors";
6467	}
6468	virtual std::string Purpose()
6469	{
6470		return NL "Verify that appropriate errors are generated by the GLSL linker.";
6471	}
6472	virtual std::string Method()
6473	{
6474		return NL "";
6475	}
6476	virtual std::string PassCriteria()
6477	{
6478		return NL "";
6479	}
6480
6481	virtual long Run()
6482	{
6483		// no layout
6484		if (!Link("#version 430 core" NL "void Run();" NL "void main() {" NL "  Run();" NL "}",
6485				  "#version 430 core" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
6486				  "void Run() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
6487			return ERROR;
6488
6489		if (!Link("#version 430 core" NL "layout(local_size_x = 2) in;" NL "void Run();" NL "void main() {" NL
6490				  "  Run();" NL "}",
6491				  "#version 430 core" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
6492				  "  uint g_output[];" NL "};" NL "void Run() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
6493			return ERROR;
6494
6495		return NO_ERROR;
6496	}
6497
6498	bool Link(const std::string& cs0, const std::string& cs1)
6499	{
6500		const GLuint p = glCreateProgram();
6501
6502		/* shader 0 */
6503		{
6504			GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
6505			glAttachShader(p, sh);
6506			glDeleteShader(sh);
6507			const char* const src = cs0.c_str();
6508			glShaderSource(sh, 1, &src, NULL);
6509			glCompileShader(sh);
6510
6511			GLint status;
6512			glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
6513			if (status == GL_FALSE)
6514			{
6515				glDeleteProgram(p);
6516				m_context.getTestContext().getLog()
6517					<< tcu::TestLog::Message << "CS0 compilation should be ok." << tcu::TestLog::EndMessage;
6518				return false;
6519			}
6520		}
6521		/* shader 1 */
6522		{
6523			GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
6524			glAttachShader(p, sh);
6525			glDeleteShader(sh);
6526			const char* const src = cs1.c_str();
6527			glShaderSource(sh, 1, &src, NULL);
6528			glCompileShader(sh);
6529
6530			GLint status;
6531			glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
6532			if (status == GL_FALSE)
6533			{
6534				glDeleteProgram(p);
6535				m_context.getTestContext().getLog()
6536					<< tcu::TestLog::Message << "CS1 compilation should be ok." << tcu::TestLog::EndMessage;
6537				return false;
6538			}
6539		}
6540
6541		glLinkProgram(p);
6542
6543		GLchar log[1024];
6544		glGetProgramInfoLog(p, sizeof(log), NULL, log);
6545		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Program Info Log:\n"
6546											<< log << tcu::TestLog::EndMessage;
6547
6548		GLint status;
6549		glGetProgramiv(p, GL_LINK_STATUS, &status);
6550		glDeleteProgram(p);
6551
6552		if (status == GL_TRUE)
6553		{
6554			m_context.getTestContext().getLog()
6555				<< tcu::TestLog::Message << "Link operation should fail." << tcu::TestLog::EndMessage;
6556			return false;
6557		}
6558
6559		return true;
6560	}
6561};
6562
6563class BasicWorkGroupSizeIsConst : public ComputeShaderBase
6564{
6565	virtual std::string Title()
6566	{
6567		return NL "gl_WorkGroupSize is an constant";
6568	}
6569	virtual std::string Purpose()
6570	{
6571		return NL "Verify that gl_WorkGroupSize can be used as an constant expression.";
6572	}
6573	virtual std::string Method()
6574	{
6575		return NL "";
6576	}
6577	virtual std::string PassCriteria()
6578	{
6579		return NL "";
6580	}
6581
6582	GLuint m_program;
6583	GLuint m_storage_buffer;
6584
6585	virtual long Setup()
6586	{
6587		m_program		 = 0;
6588		m_storage_buffer = 0;
6589		return NO_ERROR;
6590	}
6591
6592	virtual long Run()
6593	{
6594		const char* const glsl_cs =
6595			NL "layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;" NL
6596			   "layout(std430, binding = 0) buffer Output {" NL "  uint g_buffer[22 + gl_WorkGroupSize.x];" NL "};" NL
6597			   "shared uint g_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];" NL
6598			   "uniform uint g_uniform[gl_WorkGroupSize.z + 20] = { "
6599			   "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24 };" NL "void main() {" NL
6600			   "  g_shared[gl_LocalInvocationIndex] = 1U;" NL "  groupMemoryBarrier();" NL "  barrier();" NL
6601			   "  uint sum = 0;" NL
6602			   "  for (uint i = 0; i < gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; ++i) {" NL
6603			   "    sum += g_shared[i];" NL "  }" NL "  sum += g_uniform[gl_LocalInvocationIndex];" NL
6604			   "  g_buffer[gl_LocalInvocationIndex] = sum;" NL "}";
6605		m_program = CreateComputeProgram(glsl_cs);
6606		glLinkProgram(m_program);
6607		if (!CheckProgram(m_program))
6608			return ERROR;
6609
6610		glGenBuffers(1, &m_storage_buffer);
6611		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
6612		glBufferData(GL_SHADER_STORAGE_BUFFER, 24 * sizeof(GLuint), NULL, GL_STATIC_DRAW);
6613
6614		glUseProgram(m_program);
6615		glDispatchCompute(1, 1, 1);
6616		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
6617
6618		long	error = NO_ERROR;
6619		GLuint* data;
6620		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
6621		data =
6622			static_cast<GLuint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * 24, GL_MAP_READ_BIT));
6623		for (GLuint i = 0; i < 24; ++i)
6624		{
6625			if (data[i] != (i + 25))
6626			{
6627				m_context.getTestContext().getLog()
6628					<< tcu::TestLog::Message << "Data at index " << i << " is " << data[i] << " should be " << i + 25
6629					<< "." << tcu::TestLog::EndMessage;
6630				error = ERROR;
6631			}
6632		}
6633		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
6634		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
6635		return error;
6636	}
6637
6638	virtual long Cleanup()
6639	{
6640		glUseProgram(0);
6641		glDeleteProgram(m_program);
6642		glDeleteBuffers(1, &m_storage_buffer);
6643		return NO_ERROR;
6644	}
6645};
6646
6647} // anonymous namespace
6648
6649ComputeShaderTests::ComputeShaderTests(deqp::Context& context) : TestCaseGroup(context, "compute_shader", "")
6650{
6651}
6652
6653ComputeShaderTests::~ComputeShaderTests(void)
6654{
6655}
6656
6657void ComputeShaderTests::init()
6658{
6659	using namespace deqp;
6660	addChild(new TestSubcase(m_context, "simple-compute", TestSubcase::Create<SimpleCompute>));
6661	addChild(new TestSubcase(m_context, "one-work-group", TestSubcase::Create<BasicOneWorkGroup>));
6662	addChild(new TestSubcase(m_context, "resource-ubo", TestSubcase::Create<BasicResourceUBO>));
6663	addChild(new TestSubcase(m_context, "resource-texture", TestSubcase::Create<BasicResourceTexture>));
6664	addChild(new TestSubcase(m_context, "resource-image", TestSubcase::Create<BasicResourceImage>));
6665	addChild(new TestSubcase(m_context, "resource-atomic-counter", TestSubcase::Create<BasicResourceAtomicCounter>));
6666	addChild(new TestSubcase(m_context, "resource-subroutine", TestSubcase::Create<BasicResourceSubroutine>));
6667	addChild(new TestSubcase(m_context, "resource-uniform", TestSubcase::Create<BasicResourceUniform>));
6668	addChild(new TestSubcase(m_context, "built-in-variables", TestSubcase::Create<BasicBuiltinVariables>));
6669	addChild(new TestSubcase(m_context, "max", TestSubcase::Create<BasicMax>));
6670	addChild(new TestSubcase(m_context, "work-group-size", TestSubcase::Create<BasicWorkGroupSizeIsConst>));
6671	addChild(new TestSubcase(m_context, "build-monolithic", TestSubcase::Create<BasicBuildMonolithic>));
6672	addChild(new TestSubcase(m_context, "build-separable", TestSubcase::Create<BasicBuildSeparable>));
6673	addChild(new TestSubcase(m_context, "shared-simple", TestSubcase::Create<BasicSharedSimple>));
6674	addChild(new TestSubcase(m_context, "shared-struct", TestSubcase::Create<BasicSharedStruct>));
6675	addChild(new TestSubcase(m_context, "dispatch-indirect", TestSubcase::Create<BasicDispatchIndirect>));
6676	addChild(new TestSubcase(m_context, "sso-compute-pipeline", TestSubcase::Create<BasicSSOComputePipeline>));
6677	addChild(new TestSubcase(m_context, "sso-case2", TestSubcase::Create<BasicSSOCase2>));
6678	addChild(new TestSubcase(m_context, "sso-case3", TestSubcase::Create<BasicSSOCase3>));
6679	addChild(new TestSubcase(m_context, "atomic-case1", TestSubcase::Create<BasicAtomicCase1>));
6680	addChild(new TestSubcase(m_context, "atomic-case2", TestSubcase::Create<BasicAtomicCase2>));
6681	addChild(new TestSubcase(m_context, "atomic-case3", TestSubcase::Create<BasicAtomicCase3>));
6682	addChild(new TestSubcase(m_context, "copy-image", TestSubcase::Create<AdvancedCopyImage>));
6683	addChild(new TestSubcase(m_context, "pipeline-pre-vs", TestSubcase::Create<AdvancedPipelinePreVS>));
6684	addChild(
6685		new TestSubcase(m_context, "pipeline-gen-draw-commands", TestSubcase::Create<AdvancedPipelineGenDrawCommands>));
6686	addChild(new TestSubcase(m_context, "pipeline-compute-chain", TestSubcase::Create<AdvancedPipelineComputeChain>));
6687	addChild(new TestSubcase(m_context, "pipeline-post-fs", TestSubcase::Create<AdvancedPipelinePostFS>));
6688	addChild(new TestSubcase(m_context, "pipeline-post-xfb", TestSubcase::Create<AdvancedPipelinePostXFB>));
6689	addChild(new TestSubcase(m_context, "shared-indexing", TestSubcase::Create<AdvancedSharedIndexing>));
6690	addChild(new TestSubcase(m_context, "shared-max", TestSubcase::Create<AdvancedSharedMax>));
6691	addChild(new TestSubcase(m_context, "dynamic-paths", TestSubcase::Create<AdvancedDynamicPaths>));
6692	addChild(new TestSubcase(m_context, "resources-max", TestSubcase::Create<AdvancedResourcesMax>));
6693	addChild(new TestSubcase(m_context, "fp64-case1", TestSubcase::Create<AdvancedFP64Case1>));
6694	addChild(new TestSubcase(m_context, "fp64-case2", TestSubcase::Create<AdvancedFP64Case2>));
6695	addChild(new TestSubcase(m_context, "fp64-case3", TestSubcase::Create<AdvancedFP64Case3>));
6696	addChild(
6697		new TestSubcase(m_context, "conditional-dispatching", TestSubcase::Create<AdvancedConditionalDispatching>));
6698	addChild(new TestSubcase(m_context, "api-no-active-program", TestSubcase::Create<NegativeAPINoActiveProgram>));
6699	addChild(new TestSubcase(m_context, "api-work-group-count", TestSubcase::Create<NegativeAPIWorkGroupCount>));
6700	addChild(new TestSubcase(m_context, "api-indirect", TestSubcase::Create<NegativeAPIIndirect>));
6701	addChild(new TestSubcase(m_context, "api-program", TestSubcase::Create<NegativeAPIProgram>));
6702	addChild(
6703		new TestSubcase(m_context, "glsl-compile-time-errors", TestSubcase::Create<NegativeGLSLCompileTimeErrors>));
6704	addChild(new TestSubcase(m_context, "glsl-link-time-errors", TestSubcase::Create<NegativeGLSLLinkTimeErrors>));
6705}
6706} // gl4cts namespace
6707