1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL (ES) Module
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Shader execution utilities.
22 *//*--------------------------------------------------------------------*/
23
24#include "glsShaderExecUtil.hpp"
25#include "gluRenderContext.hpp"
26#include "gluDrawUtil.hpp"
27#include "gluObjectWrapper.hpp"
28#include "gluShaderProgram.hpp"
29#include "gluTextureUtil.hpp"
30#include "gluProgramInterfaceQuery.hpp"
31#include "gluPixelTransfer.hpp"
32#include "gluStrUtil.hpp"
33#include "tcuTestLog.hpp"
34#include "glwFunctions.hpp"
35#include "glwEnums.hpp"
36#include "deSTLUtil.hpp"
37#include "deStringUtil.hpp"
38#include "deUniquePtr.hpp"
39#include "deMemory.h"
40
41#include <map>
42
43namespace deqp
44{
45namespace gls
46{
47
48namespace ShaderExecUtil
49{
50
51using std::vector;
52
53static bool isExtensionSupported (const glu::RenderContext& renderCtx, const std::string& extension)
54{
55	const glw::Functions&	gl		= renderCtx.getFunctions();
56	int						numExts	= 0;
57
58	gl.getIntegerv(GL_NUM_EXTENSIONS, &numExts);
59
60	for (int ndx = 0; ndx < numExts; ndx++)
61	{
62		const char* curExt = (const char*)gl.getStringi(GL_EXTENSIONS, ndx);
63
64		if (extension == curExt)
65			return true;
66	}
67
68	return false;
69}
70
71static void checkExtension (const glu::RenderContext& renderCtx, const std::string& extension)
72{
73	if (!isExtensionSupported(renderCtx, extension))
74		throw tcu::NotSupportedError(extension + " is not supported");
75}
76
77static void checkLimit (const glu::RenderContext& renderCtx, deUint32 pname, int required)
78{
79	const glw::Functions&	gl					= renderCtx.getFunctions();
80	int						implementationLimit	= -1;
81	deUint32				error;
82
83	gl.getIntegerv(pname, &implementationLimit);
84	error = gl.getError();
85
86	if (error != GL_NO_ERROR)
87		throw tcu::TestError("Failed to query " + de::toString(glu::getGettableStateStr(pname)) + " - got " + de::toString(glu::getErrorStr(error)));
88	if (implementationLimit < required)
89		throw tcu::NotSupportedError("Test requires " + de::toString(glu::getGettableStateStr(pname)) + " >= " + de::toString(required) + ", got " + de::toString(implementationLimit));
90}
91
92// Shader utilities
93
94static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
95{
96	const bool			usesInout	= glu::glslVersionUsesInOutQualifiers(shaderSpec.version);
97	const char*			in			= usesInout ? "in"		: "attribute";
98	const char*			out			= usesInout ? "out"		: "varying";
99	std::ostringstream	src;
100
101	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
102
103	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
104
105	if (!shaderSpec.globalDeclarations.empty())
106		src << shaderSpec.globalDeclarations << "\n";
107
108	src << in << " highp vec4 a_position;\n";
109
110	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
111		src << in << " " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
112
113	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
114	{
115		DE_ASSERT(output->varType.isBasicType());
116
117		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
118		{
119			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
120			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
121			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
122
123			src << "flat " << out << " " << glu::declare(intType, outputPrefix + output->name) << ";\n";
124		}
125		else
126			src << "flat " << out << " " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
127	}
128
129	src << "\n"
130		<< "void main (void)\n"
131		<< "{\n"
132		<< "	gl_Position = a_position;\n"
133		<< "	gl_PointSize = 1.0;\n\n";
134
135	// Declare & fetch local input variables
136	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
137		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
138
139	// Declare local output variables
140	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
141		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
142
143	// Operation - indented to correct level.
144	{
145		std::istringstream	opSrc	(shaderSpec.source);
146		std::string			line;
147
148		while (std::getline(opSrc, line))
149			src << "\t" << line << "\n";
150	}
151
152	// Assignments to outputs.
153	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
154	{
155		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
156		{
157			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
158			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
159
160			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
161		}
162		else
163			src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
164	}
165
166	src << "}\n";
167
168	return src.str();
169}
170
171static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
172{
173	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
174	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
175
176	std::ostringstream	src;
177
178	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
179
180	if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
181		src << "#extension GL_EXT_geometry_shader : require\n";
182
183	if (!shaderSpec.globalDeclarations.empty())
184		src << shaderSpec.globalDeclarations << "\n";
185
186	src << "layout(points) in;\n"
187		<< "layout(points, max_vertices = 1) out;\n";
188
189	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
190		src << "flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
191
192	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
193	{
194		DE_ASSERT(output->varType.isBasicType());
195
196		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
197		{
198			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
199			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
200			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
201
202			src << "flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
203		}
204		else
205			src << "flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
206	}
207
208	src << "\n"
209		<< "void main (void)\n"
210		<< "{\n"
211		<< "	gl_Position = gl_in[0].gl_Position;\n\n";
212
213	// Fetch input variables
214	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
215		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
216
217	// Declare local output variables.
218	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
219		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
220
221	src << "\n";
222
223	// Operation - indented to correct level.
224	{
225		std::istringstream	opSrc	(shaderSpec.source);
226		std::string			line;
227
228		while (std::getline(opSrc, line))
229			src << "\t" << line << "\n";
230	}
231
232	// Assignments to outputs.
233	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
234	{
235		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
236		{
237			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
238			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
239
240			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
241		}
242		else
243			src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
244	}
245
246	src << "	EmitVertex();\n"
247		<< "	EndPrimitive();\n"
248		<< "}\n";
249
250	return src.str();
251}
252
253static std::string generateEmptyFragmentSource (glu::GLSLVersion version)
254{
255	const bool			customOut		= glu::glslVersionUsesInOutQualifiers(version);
256	std::ostringstream	src;
257
258	src << glu::getGLSLVersionDeclaration(version) << "\n";
259
260	// \todo [2013-08-05 pyry] Do we need one dummy output?
261
262	src << "void main (void)\n{\n";
263	if (!customOut)
264		src << "	gl_FragColor = vec4(0.0);\n";
265	src << "}\n";
266
267	return src.str();
268}
269
270static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
271{
272	// flat qualifier is not present in earlier versions?
273	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
274
275	std::ostringstream src;
276
277	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"
278		<< "in highp vec4 a_position;\n";
279
280	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
281	{
282		src << "in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
283			<< "flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
284	}
285
286	src << "\nvoid main (void)\n{\n"
287		<< "	gl_Position = a_position;\n"
288		<< "	gl_PointSize = 1.0;\n";
289
290	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
291		src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
292
293	src << "}\n";
294
295	return src.str();
296}
297
298static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
299{
300	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
301
302	for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
303	{
304		const Symbol&				output		= shaderSpec.outputs[outNdx];
305		const int					location	= de::lookup(outLocationMap, output.name);
306		const std::string			outVarName	= outputPrefix + output.name;
307		glu::VariableDeclaration	decl		(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
308
309		TCU_CHECK_INTERNAL(output.varType.isBasicType());
310
311		if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
312		{
313			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
314			const glu::DataType	uintBasicType	= vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
315			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
316
317			decl.varType = uintType;
318			src << decl << ";\n";
319		}
320		else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
321		{
322			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
323			const glu::DataType	intBasicType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
324			const glu::VarType	intType			(intBasicType, glu::PRECISION_HIGHP);
325
326			decl.varType = intType;
327			src << decl << ";\n";
328		}
329		else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
330		{
331			const int			vecSize			= glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
332			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
333			const glu::DataType	uintBasicType	= glu::getDataTypeUintVec(vecSize);
334			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
335
336			decl.varType = uintType;
337			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
338			{
339				decl.name				= outVarName + "_" + de::toString(vecNdx);
340				decl.layout.location	= location + vecNdx;
341				src << decl << ";\n";
342			}
343		}
344		else
345			src << decl << ";\n";
346	}
347}
348
349static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix)
350{
351	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
352	{
353		if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
354			src << "	o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
355		else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
356		{
357			const int	numVecs		= glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
358
359			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
360				if (useIntOutputs)
361					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
362				else
363					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << valuePrefix << output->name << "[" << vecNdx << "];\n";
364		}
365		else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
366		{
367			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
368			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
369
370			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
371		}
372		else
373			src << "\t" << outputPrefix << output->name << " = " << valuePrefix << output->name << ";\n";
374	}
375}
376
377static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
378{
379	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
380
381	std::ostringstream	src;
382
383	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
384
385	if (!shaderSpec.globalDeclarations.empty())
386		src << shaderSpec.globalDeclarations << "\n";
387
388	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
389		src << "flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
390
391	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
392
393	src << "\nvoid main (void)\n{\n";
394
395	// Declare & fetch local input variables
396	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
397		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
398
399	// Declare output variables
400	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
401		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
402
403	// Operation - indented to correct level.
404	{
405		std::istringstream	opSrc	(shaderSpec.source);
406		std::string			line;
407
408		while (std::getline(opSrc, line))
409			src << "\t" << line << "\n";
410	}
411
412	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix);
413
414	src << "}\n";
415
416	return src.str();
417}
418
419static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
420{
421	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
422
423	std::ostringstream	src;
424
425	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
426
427	if (!shaderSpec.globalDeclarations.empty())
428		src << shaderSpec.globalDeclarations << "\n";
429
430	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
431	{
432		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
433		{
434			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
435			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
436			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
437
438			src << "flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
439		}
440		else
441			src << "flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
442	}
443
444	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
445
446	src << "\nvoid main (void)\n{\n";
447
448	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
449
450	src << "}\n";
451
452	return src.str();
453}
454
455// ShaderExecutor
456
457ShaderExecutor::ShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
458	: m_renderCtx	(renderCtx)
459	, m_inputs		(shaderSpec.inputs)
460	, m_outputs		(shaderSpec.outputs)
461{
462}
463
464ShaderExecutor::~ShaderExecutor (void)
465{
466}
467
468void ShaderExecutor::useProgram (void)
469{
470	DE_ASSERT(isOk());
471	m_renderCtx.getFunctions().useProgram(getProgram());
472}
473
474// FragmentOutExecutor
475
476struct FragmentOutputLayout
477{
478	std::vector<const Symbol*>		locationSymbols;		//! Symbols by location
479	std::map<std::string, int>		locationMap;			//! Map from symbol name to start location
480};
481
482class FragmentOutExecutor : public ShaderExecutor
483{
484public:
485								FragmentOutExecutor		(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
486								~FragmentOutExecutor	(void);
487
488	void						execute					(int numValues, const void* const* inputs, void* const* outputs);
489
490protected:
491	const FragmentOutputLayout	m_outputLayout;
492};
493
494static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
495{
496	FragmentOutputLayout	ret;
497	int						location	= 0;
498
499	for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
500	{
501		const int	numLocations	= glu::getDataTypeNumLocations(it->varType.getBasicType());
502
503		TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
504		de::insert(ret.locationMap, it->name, location);
505		location += numLocations;
506
507		for (int ndx = 0; ndx < numLocations; ++ndx)
508			ret.locationSymbols.push_back(&*it);
509	}
510
511	return ret;
512}
513
514inline bool hasFloatRenderTargets (const glu::RenderContext& renderCtx)
515{
516	glu::ContextType type = renderCtx.getType();
517	return glu::isContextTypeGLCore(type);
518}
519
520FragmentOutExecutor::FragmentOutExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
521	: ShaderExecutor	(renderCtx, shaderSpec)
522	, m_outputLayout	(computeFragmentOutputLayout(m_outputs))
523{
524}
525
526FragmentOutExecutor::~FragmentOutExecutor (void)
527{
528}
529
530inline int queryInt (const glw::Functions& gl, deUint32 pname)
531{
532	int value = 0;
533	gl.getIntegerv(pname, &value);
534	return value;
535}
536
537static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
538{
539	const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
540	{
541		tcu::TextureFormat::R,
542		tcu::TextureFormat::RG,
543		tcu::TextureFormat::RGBA,	// No RGB variants available.
544		tcu::TextureFormat::RGBA
545	};
546
547	const glu::DataType					basicType		= outputType.getBasicType();
548	const int							numComps		= glu::getDataTypeNumComponents(basicType);
549	tcu::TextureFormat::ChannelType		channelType;
550
551	switch (glu::getDataTypeScalarType(basicType))
552	{
553		case glu::TYPE_UINT:	channelType = tcu::TextureFormat::UNSIGNED_INT32;												break;
554		case glu::TYPE_INT:		channelType = tcu::TextureFormat::SIGNED_INT32;													break;
555		case glu::TYPE_BOOL:	channelType = tcu::TextureFormat::SIGNED_INT32;													break;
556		case glu::TYPE_FLOAT:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;	break;
557		default:
558			throw tcu::InternalError("Invalid output type");
559	}
560
561	DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
562
563	return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
564}
565
566void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
567{
568	const glw::Functions&			gl					= m_renderCtx.getFunctions();
569	const bool						useIntOutputs		= !hasFloatRenderTargets(m_renderCtx);
570	const int						maxRenderbufferSize	= queryInt(gl, GL_MAX_RENDERBUFFER_SIZE);
571	const int						framebufferW		= de::min(maxRenderbufferSize, numValues);
572	const int						framebufferH		= (numValues / framebufferW) + ((numValues % framebufferW != 0) ? 1 : 0);
573
574	glu::Framebuffer				framebuffer			(m_renderCtx);
575	glu::RenderbufferVector			renderbuffers		(m_renderCtx, m_outputLayout.locationSymbols.size());
576
577	vector<glu::VertexArrayBinding>	vertexArrays;
578	vector<tcu::Vec2>				positions			(numValues);
579
580	if (framebufferH > maxRenderbufferSize)
581		throw tcu::NotSupportedError("Value count is too high for maximum supported renderbuffer size");
582
583	// Compute positions - 1px points are used to drive fragment shading.
584	for (int valNdx = 0; valNdx < numValues; valNdx++)
585	{
586		const int		ix		= valNdx % framebufferW;
587		const int		iy		= valNdx / framebufferW;
588		const float		fx		= -1.0f + 2.0f*((float(ix) + 0.5f) / float(framebufferW));
589		const float		fy		= -1.0f + 2.0f*((float(iy) + 0.5f) / float(framebufferH));
590
591		positions[valNdx] = tcu::Vec2(fx, fy);
592	}
593
594	// Vertex inputs.
595	vertexArrays.push_back(glu::va::Float("a_position", 2, numValues, 0, (const float*)&positions[0]));
596
597	for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
598	{
599		const Symbol&		symbol		= m_inputs[inputNdx];
600		const std::string	attribName	= "a_" + symbol.name;
601		const void*			ptr			= inputs[inputNdx];
602		const glu::DataType	basicType	= symbol.varType.getBasicType();
603		const int			vecSize		= glu::getDataTypeScalarSize(basicType);
604
605		if (glu::isDataTypeFloatOrVec(basicType))
606			vertexArrays.push_back(glu::va::Float(attribName, vecSize, numValues, 0, (const float*)ptr));
607		else if (glu::isDataTypeIntOrIVec(basicType))
608			vertexArrays.push_back(glu::va::Int32(attribName, vecSize, numValues, 0, (const deInt32*)ptr));
609		else if (glu::isDataTypeUintOrUVec(basicType))
610			vertexArrays.push_back(glu::va::Uint32(attribName, vecSize, numValues, 0, (const deUint32*)ptr));
611		else if (glu::isDataTypeMatrix(basicType))
612		{
613			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
614			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
615			int		stride	= numRows * numCols * (int)sizeof(float);
616
617			for (int colNdx = 0; colNdx < numCols; ++colNdx)
618				vertexArrays.push_back(glu::va::Float(attribName, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
619		}
620		else
621			DE_ASSERT(false);
622	}
623
624	// Construct framebuffer.
625	gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
626
627	for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
628	{
629		const Symbol&	output			= *m_outputLayout.locationSymbols[outNdx];
630		const deUint32	renderbuffer	= renderbuffers[outNdx];
631		const deUint32	format			= glu::getInternalFormat(getRenderbufferFormatForOutput(output.varType, useIntOutputs));
632
633		gl.bindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
634		gl.renderbufferStorage(GL_RENDERBUFFER, format, framebufferW, framebufferH);
635		gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0+outNdx, GL_RENDERBUFFER, renderbuffer);
636	}
637	gl.bindRenderbuffer(GL_RENDERBUFFER, 0);
638	GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up framebuffer object");
639	TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
640
641	{
642		vector<deUint32> drawBuffers(m_outputLayout.locationSymbols.size());
643		for (int ndx = 0; ndx < (int)m_outputLayout.locationSymbols.size(); ndx++)
644			drawBuffers[ndx] = GL_COLOR_ATTACHMENT0+ndx;
645		gl.drawBuffers((int)drawBuffers.size(), &drawBuffers[0]);
646		GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawBuffers()");
647	}
648
649	// Render
650	gl.viewport(0, 0, framebufferW, framebufferH);
651	glu::draw(m_renderCtx, this->getProgram(), (int)vertexArrays.size(), &vertexArrays[0],
652			  glu::pr::Points(numValues));
653	GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
654
655	// Read back pixels.
656	{
657		tcu::TextureLevel	tmpBuf;
658
659		// \todo [2013-08-07 pyry] Some fast-paths could be added here.
660
661		for (int outNdx = 0; outNdx < (int)m_outputs.size(); ++outNdx)
662		{
663			const Symbol&				output			= m_outputs[outNdx];
664			const int					outSize			= output.varType.getScalarSize();
665			const int					outVecSize		= glu::getDataTypeNumComponents(output.varType.getBasicType());
666			const int					outNumLocs		= glu::getDataTypeNumLocations(output.varType.getBasicType());
667			deUint32*					dstPtrBase		= static_cast<deUint32*>(outputs[outNdx]);
668			const tcu::TextureFormat	format			= getRenderbufferFormatForOutput(output.varType, useIntOutputs);
669			const tcu::TextureFormat	readFormat		(tcu::TextureFormat::RGBA, format.type);
670			const int					outLocation		= de::lookup(m_outputLayout.locationMap, output.name);
671
672			tmpBuf.setStorage(readFormat, framebufferW, framebufferH);
673
674			for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
675			{
676				gl.readBuffer(GL_COLOR_ATTACHMENT0 + outLocation + locNdx);
677				glu::readPixels(m_renderCtx, 0, 0, tmpBuf.getAccess());
678				GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels");
679
680				if (outSize == 4 && outNumLocs == 1)
681					deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues*outVecSize*sizeof(deUint32));
682				else
683				{
684					for (int valNdx = 0; valNdx < numValues; valNdx++)
685					{
686						const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx*4;
687						deUint32*		dstPtr = &dstPtrBase[outSize*valNdx + outVecSize*locNdx];
688						deMemcpy(dstPtr, srcPtr, outVecSize*sizeof(deUint32));
689					}
690				}
691			}
692		}
693	}
694
695	// \todo [2013-08-07 pyry] Clear draw buffers & viewport?
696	gl.bindFramebuffer(GL_FRAMEBUFFER, 0);
697}
698
699// VertexShaderExecutor
700
701class VertexShaderExecutor : public FragmentOutExecutor
702{
703public:
704								VertexShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
705								~VertexShaderExecutor	(void);
706
707	bool						isOk					(void) const				{ return m_program.isOk();			}
708	void						log						(tcu::TestLog& dst) const	{ dst << m_program;					}
709	deUint32					getProgram				(void) const				{ return m_program.getProgram();	}
710
711protected:
712	const glu::ShaderProgram	m_program;
713};
714
715VertexShaderExecutor::VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
716	: FragmentOutExecutor	(renderCtx, shaderSpec)
717	, m_program				(renderCtx,
718							 glu::ProgramSources() << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_"))
719												   << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "vtx_out_", "o_")))
720{
721}
722
723VertexShaderExecutor::~VertexShaderExecutor (void)
724{
725}
726
727// GeometryShaderExecutor
728
729class GeometryShaderExecutor : public FragmentOutExecutor
730{
731public:
732	static GeometryShaderExecutor*	create					(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
733
734									~GeometryShaderExecutor	(void);
735
736	bool							isOk					(void) const				{ return m_program.isOk();			}
737	void							log						(tcu::TestLog& dst) const	{ dst << m_program;					}
738	deUint32						getProgram				(void) const				{ return m_program.getProgram();	}
739
740protected:
741	const glu::ShaderProgram		m_program;
742
743private:
744									GeometryShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
745};
746
747GeometryShaderExecutor* GeometryShaderExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
748{
749	if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
750		checkExtension(renderCtx, "GL_EXT_geometry_shader");
751
752	return new GeometryShaderExecutor(renderCtx, shaderSpec);
753}
754
755GeometryShaderExecutor::GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
756	: FragmentOutExecutor	(renderCtx, shaderSpec)
757	, m_program				(renderCtx,
758							 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_"))
759												   << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_"))
760												   << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "geom_out_", "o_")))
761{
762}
763
764GeometryShaderExecutor::~GeometryShaderExecutor (void)
765{
766}
767
768// FragmentShaderExecutor
769
770class FragmentShaderExecutor : public FragmentOutExecutor
771{
772public:
773								FragmentShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
774								~FragmentShaderExecutor	(void);
775
776	bool						isOk					(void) const				{ return m_program.isOk();			}
777	void						log						(tcu::TestLog& dst) const	{ dst << m_program;					}
778	deUint32					getProgram				(void) const				{ return m_program.getProgram();	}
779
780protected:
781	const glu::ShaderProgram	m_program;
782};
783
784FragmentShaderExecutor::FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
785	: FragmentOutExecutor	(renderCtx, shaderSpec)
786	, m_program				(renderCtx,
787							 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_"))
788												   << glu::FragmentSource(generateFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outputLayout.locationMap, "vtx_out_", "o_")))
789{
790}
791
792FragmentShaderExecutor::~FragmentShaderExecutor (void)
793{
794}
795
796// Shared utilities for compute and tess executors
797
798static deUint32 getVecStd430ByteAlignment (glu::DataType type)
799{
800	switch (glu::getDataTypeScalarSize(type))
801	{
802		case 1:		return 4u;
803		case 2:		return 8u;
804		case 3:		return 16u;
805		case 4:		return 16u;
806		default:
807			DE_ASSERT(false);
808			return 0u;
809	}
810}
811
812class BufferIoExecutor : public ShaderExecutor
813{
814public:
815						BufferIoExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
816						~BufferIoExecutor	(void);
817
818	bool				isOk				(void) const				{ return m_program.isOk();			}
819	void				log					(tcu::TestLog& dst) const	{ dst << m_program;					}
820	deUint32			getProgram			(void) const				{ return m_program.getProgram();	}
821
822protected:
823	enum
824	{
825		INPUT_BUFFER_BINDING	= 0,
826		OUTPUT_BUFFER_BINDING	= 1,
827	};
828
829	void				initBuffers			(int numValues);
830	deUint32			getInputBuffer		(void) const		{ return *m_inputBuffer;					}
831	deUint32			getOutputBuffer		(void) const		{ return *m_outputBuffer;					}
832	deUint32			getInputStride		(void) const		{ return getLayoutStride(m_inputLayout);	}
833	deUint32			getOutputStride		(void) const		{ return getLayoutStride(m_outputLayout);	}
834
835	void				uploadInputBuffer	(const void* const* inputPtrs, int numValues);
836	void				readOutputBuffer	(void* const* outputPtrs, int numValues);
837
838	static void			declareBufferBlocks	(std::ostream& src, const ShaderSpec& spec);
839	static void			generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
840
841	glu::ShaderProgram	m_program;
842
843private:
844	struct VarLayout
845	{
846		deUint32		offset;
847		deUint32		stride;
848		deUint32		matrixStride;
849
850		VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
851	};
852
853	void				resizeInputBuffer	(int newSize);
854	void				resizeOutputBuffer	(int newSize);
855
856	static void			computeVarLayout	(const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
857	static deUint32		getLayoutStride		(const vector<VarLayout>& layout);
858
859	static void			copyToBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
860	static void			copyFromBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
861
862	glu::Buffer			m_inputBuffer;
863	glu::Buffer			m_outputBuffer;
864
865	vector<VarLayout>	m_inputLayout;
866	vector<VarLayout>	m_outputLayout;
867};
868
869BufferIoExecutor::BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
870	: ShaderExecutor	(renderCtx, shaderSpec)
871	, m_program			(renderCtx, sources)
872	, m_inputBuffer		(renderCtx)
873	, m_outputBuffer	(renderCtx)
874{
875	computeVarLayout(m_inputs, &m_inputLayout);
876	computeVarLayout(m_outputs, &m_outputLayout);
877}
878
879BufferIoExecutor::~BufferIoExecutor (void)
880{
881}
882
883void BufferIoExecutor::resizeInputBuffer (int newSize)
884{
885	const glw::Functions& gl = m_renderCtx.getFunctions();
886	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_inputBuffer);
887	gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
888	GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate input buffer");
889}
890
891void BufferIoExecutor::resizeOutputBuffer (int newSize)
892{
893	const glw::Functions& gl = m_renderCtx.getFunctions();
894	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_outputBuffer);
895	gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
896	GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate output buffer");
897}
898
899void BufferIoExecutor::initBuffers (int numValues)
900{
901	const deUint32		inputStride			= getLayoutStride(m_inputLayout);
902	const deUint32		outputStride		= getLayoutStride(m_outputLayout);
903	const int			inputBufferSize		= numValues * inputStride;
904	const int			outputBufferSize	= numValues * outputStride;
905
906	resizeInputBuffer(inputBufferSize);
907	resizeOutputBuffer(outputBufferSize);
908}
909
910void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
911{
912	deUint32	maxAlignment	= 0;
913	deUint32	curOffset		= 0;
914
915	DE_ASSERT(layout->empty());
916	layout->resize(symbols.size());
917
918	for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
919	{
920		const Symbol&		symbol		= symbols[varNdx];
921		const glu::DataType	basicType	= symbol.varType.getBasicType();
922		VarLayout&			layoutEntry	= (*layout)[varNdx];
923
924		if (glu::isDataTypeScalarOrVector(basicType))
925		{
926			const deUint32	alignment	= getVecStd430ByteAlignment(basicType);
927			const deUint32	size		= (deUint32)glu::getDataTypeScalarSize(basicType)*(int)sizeof(deUint32);
928
929			curOffset		= (deUint32)deAlign32((int)curOffset, (int)alignment);
930			maxAlignment	= de::max(maxAlignment, alignment);
931
932			layoutEntry.offset			= curOffset;
933			layoutEntry.matrixStride	= 0;
934
935			curOffset += size;
936		}
937		else if (glu::isDataTypeMatrix(basicType))
938		{
939			const int				numVecs			= glu::getDataTypeMatrixNumColumns(basicType);
940			const glu::DataType		vecType			= glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
941			const deUint32			vecAlignment	= getVecStd430ByteAlignment(vecType);
942
943			curOffset		= (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
944			maxAlignment	= de::max(maxAlignment, vecAlignment);
945
946			layoutEntry.offset			= curOffset;
947			layoutEntry.matrixStride	= vecAlignment;
948
949			curOffset += vecAlignment*numVecs;
950		}
951		else
952			DE_ASSERT(false);
953	}
954
955	{
956		const deUint32	totalSize	= (deUint32)deAlign32(curOffset, maxAlignment);
957
958		for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
959			varIter->stride = totalSize;
960	}
961}
962
963inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
964{
965	return layout.empty() ? 0 : layout[0].stride;
966}
967
968void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
969{
970	if (varType.isBasicType())
971	{
972		const glu::DataType		basicType		= varType.getBasicType();
973		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
974		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
975		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
976		const int				numComps		= scalarSize / numVecs;
977
978		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
979		{
980			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
981			{
982				const int		srcOffset		= (int)sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
983				const int		dstOffset		= layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
984				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
985				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
986
987				deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
988			}
989		}
990	}
991	else
992		throw tcu::InternalError("Unsupported type");
993}
994
995void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
996{
997	if (varType.isBasicType())
998	{
999		const glu::DataType		basicType		= varType.getBasicType();
1000		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1001		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1002		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1003		const int				numComps		= scalarSize / numVecs;
1004
1005		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1006		{
1007			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1008			{
1009				const int		srcOffset		= layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1010				const int		dstOffset		= (int)sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1011				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1012				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1013
1014				deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1015			}
1016		}
1017	}
1018	else
1019		throw tcu::InternalError("Unsupported type");
1020}
1021
1022void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1023{
1024	const glw::Functions&	gl				= m_renderCtx.getFunctions();
1025	const deUint32			buffer			= *m_inputBuffer;
1026	const deUint32			inputStride		= getLayoutStride(m_inputLayout);
1027	const int				inputBufferSize	= inputStride*numValues;
1028
1029	if (inputBufferSize == 0)
1030		return; // No inputs
1031
1032	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1033	void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, inputBufferSize, GL_MAP_WRITE_BIT);
1034	GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1035	TCU_CHECK(mapPtr);
1036
1037	try
1038	{
1039		DE_ASSERT(m_inputs.size() == m_inputLayout.size());
1040		for (size_t inputNdx = 0; inputNdx < m_inputs.size(); ++inputNdx)
1041		{
1042			const glu::VarType&		varType		= m_inputs[inputNdx].varType;
1043			const VarLayout&		layout		= m_inputLayout[inputNdx];
1044
1045			copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], mapPtr);
1046		}
1047	}
1048	catch (...)
1049	{
1050		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1051		throw;
1052	}
1053
1054	gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1055	GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1056}
1057
1058void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1059{
1060	const glw::Functions&	gl					= m_renderCtx.getFunctions();
1061	const deUint32			buffer				= *m_outputBuffer;
1062	const deUint32			outputStride		= getLayoutStride(m_outputLayout);
1063	const int				outputBufferSize	= numValues*outputStride;
1064
1065	DE_ASSERT(outputBufferSize > 0); // At least some outputs are required.
1066
1067	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1068	void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, outputBufferSize, GL_MAP_READ_BIT);
1069	GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1070	TCU_CHECK(mapPtr);
1071
1072	try
1073	{
1074		DE_ASSERT(m_outputs.size() == m_outputLayout.size());
1075		for (size_t outputNdx = 0; outputNdx < m_outputs.size(); ++outputNdx)
1076		{
1077			const glu::VarType&		varType		= m_outputs[outputNdx].varType;
1078			const VarLayout&		layout		= m_outputLayout[outputNdx];
1079
1080			copyFromBuffer(varType, layout, numValues, mapPtr, outputPtrs[outputNdx]);
1081		}
1082	}
1083	catch (...)
1084	{
1085		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1086		throw;
1087	}
1088
1089	gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1090	GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1091}
1092
1093void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1094{
1095	// Input struct
1096	if (!spec.inputs.empty())
1097	{
1098		glu::StructType inputStruct("Inputs");
1099		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1100			inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1101		src << glu::declare(&inputStruct) << ";\n";
1102	}
1103
1104	// Output struct
1105	{
1106		glu::StructType outputStruct("Outputs");
1107		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1108			outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1109		src << glu::declare(&outputStruct) << ";\n";
1110	}
1111
1112	src << "\n";
1113
1114	if (!spec.inputs.empty())
1115	{
1116		src	<< "layout(binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1117			<< "{\n"
1118			<< "	Inputs inputs[];\n"
1119			<< "};\n";
1120	}
1121
1122	src	<< "layout(binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1123		<< "{\n"
1124		<< "	Outputs outputs[];\n"
1125		<< "};\n"
1126		<< "\n";
1127}
1128
1129void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1130{
1131	for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1132		src << "\t" << glu::declare(symIter->varType, symIter->name) << " = inputs[" << invocationNdxName << "]." << symIter->name << ";\n";
1133
1134	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1135		src << "\t" << glu::declare(symIter->varType, symIter->name) << ";\n";
1136
1137	src << "\n";
1138
1139	{
1140		std::istringstream	opSrc	(spec.source);
1141		std::string			line;
1142
1143		while (std::getline(opSrc, line))
1144			src << "\t" << line << "\n";
1145	}
1146
1147	src << "\n";
1148	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1149		src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1150}
1151
1152// ComputeShaderExecutor
1153
1154class ComputeShaderExecutor : public BufferIoExecutor
1155{
1156public:
1157						ComputeShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1158						~ComputeShaderExecutor	(void);
1159
1160	void				execute					(int numValues, const void* const* inputs, void* const* outputs);
1161
1162protected:
1163	static std::string	generateComputeShader	(const ShaderSpec& spec);
1164
1165	tcu::IVec3			m_maxWorkSize;
1166};
1167
1168std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
1169{
1170	std::ostringstream src;
1171
1172	src << glu::getGLSLVersionDeclaration(spec.version) << "\n";
1173
1174	if (!spec.globalDeclarations.empty())
1175		src << spec.globalDeclarations << "\n";
1176
1177	src << "layout(local_size_x = 1) in;\n"
1178		<< "\n";
1179
1180	declareBufferBlocks(src, spec);
1181
1182	src << "void main (void)\n"
1183		<< "{\n"
1184		<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
1185		<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
1186
1187	generateExecBufferIo(src, spec, "invocationNdx");
1188
1189	src << "}\n";
1190
1191	return src.str();
1192}
1193
1194ComputeShaderExecutor::ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1195	: BufferIoExecutor	(renderCtx, shaderSpec,
1196						 glu::ProgramSources() << glu::ComputeSource(generateComputeShader(shaderSpec)))
1197{
1198	m_maxWorkSize	= tcu::IVec3(128,128,64); // Minimum in 3plus
1199}
1200
1201ComputeShaderExecutor::~ComputeShaderExecutor (void)
1202{
1203}
1204
1205void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1206{
1207	const glw::Functions&	gl						= m_renderCtx.getFunctions();
1208	const int				maxValuesPerInvocation	= m_maxWorkSize[0];
1209	const deUint32			inputStride				= getInputStride();
1210	const deUint32			outputStride			= getOutputStride();
1211
1212	initBuffers(numValues);
1213
1214	// Setup input buffer & copy data
1215	uploadInputBuffer(inputs, numValues);
1216
1217	// Perform compute invocations
1218	{
1219		int curOffset = 0;
1220		while (curOffset < numValues)
1221		{
1222			const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
1223
1224			if (inputStride > 0)
1225				gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer(), curOffset*inputStride, numToExec*inputStride);
1226
1227			gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer(), curOffset*outputStride, numToExec*outputStride);
1228			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferRange(GL_SHADER_STORAGE_BUFFER)");
1229
1230			gl.dispatchCompute(numToExec, 1, 1);
1231			GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1232
1233			curOffset += numToExec;
1234		}
1235	}
1236
1237	// Read back data
1238	readOutputBuffer(outputs, numValues);
1239}
1240
1241// Tessellation utils
1242
1243static std::string generateVertexShaderForTess (glu::GLSLVersion version)
1244{
1245	std::ostringstream	src;
1246
1247	src << glu::getGLSLVersionDeclaration(version) << "\n";
1248
1249	src << "void main (void)\n{\n"
1250		<< "	gl_Position = vec4(gl_VertexID/2, gl_VertexID%2, 0.0, 1.0);\n"
1251		<< "}\n";
1252
1253	return src.str();
1254}
1255
1256void checkTessSupport (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, glu::ShaderType stage)
1257{
1258	const int numBlockRequired = 2; // highest binding is always 1 (output) i.e. count == 2
1259
1260	if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1261		checkExtension(renderCtx, "GL_EXT_tessellation_shader");
1262
1263	if (stage == glu::SHADERTYPE_TESSELLATION_CONTROL)
1264		checkLimit(renderCtx, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, numBlockRequired);
1265	else if (stage == glu::SHADERTYPE_TESSELLATION_EVALUATION)
1266		checkLimit(renderCtx, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, numBlockRequired);
1267	else
1268		DE_ASSERT(false);
1269}
1270
1271// TessControlExecutor
1272
1273class TessControlExecutor : public BufferIoExecutor
1274{
1275public:
1276	static TessControlExecutor*	create						(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1277
1278								~TessControlExecutor		(void);
1279
1280	void						execute						(int numValues, const void* const* inputs, void* const* outputs);
1281
1282
1283protected:
1284	static std::string			generateTessControlShader	(const ShaderSpec& shaderSpec);
1285
1286private:
1287								TessControlExecutor			(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1288};
1289
1290TessControlExecutor* TessControlExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1291{
1292	checkTessSupport(renderCtx, shaderSpec, glu::SHADERTYPE_TESSELLATION_CONTROL);
1293
1294	return new TessControlExecutor(renderCtx, shaderSpec);
1295}
1296
1297std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
1298{
1299	std::ostringstream src;
1300
1301	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1302
1303	if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1304		src << "#extension GL_EXT_tessellation_shader : require\n";
1305
1306	if (!shaderSpec.globalDeclarations.empty())
1307		src << shaderSpec.globalDeclarations << "\n";
1308
1309	src << "\nlayout(vertices = 1) out;\n\n";
1310
1311	declareBufferBlocks(src, shaderSpec);
1312
1313	src << "void main (void)\n{\n";
1314
1315	for (int ndx = 0; ndx < 2; ndx++)
1316		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1317
1318	for (int ndx = 0; ndx < 4; ndx++)
1319		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1320
1321	src << "\n"
1322		<< "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
1323
1324	generateExecBufferIo(src, shaderSpec, "invocationId");
1325
1326	src << "}\n";
1327
1328	return src.str();
1329}
1330
1331static std::string generateEmptyTessEvalShader (glu::GLSLVersion version)
1332{
1333	std::ostringstream src;
1334
1335	src << glu::getGLSLVersionDeclaration(version) << "\n";
1336
1337	if (glu::glslVersionIsES(version) && version <= glu::GLSL_VERSION_310_ES)
1338		src << "#extension GL_EXT_tessellation_shader : require\n\n";
1339
1340	src << "layout(triangles, ccw) in;\n";
1341
1342	src << "\nvoid main (void)\n{\n"
1343		<< "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
1344		<< "}\n";
1345
1346	return src.str();
1347}
1348
1349TessControlExecutor::TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1350	: BufferIoExecutor	(renderCtx, shaderSpec, glu::ProgramSources()
1351							<< glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1352							<< glu::TessellationControlSource(generateTessControlShader(shaderSpec))
1353							<< glu::TessellationEvaluationSource(generateEmptyTessEvalShader(shaderSpec.version))
1354							<< glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1355{
1356}
1357
1358TessControlExecutor::~TessControlExecutor (void)
1359{
1360}
1361
1362void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1363{
1364	const glw::Functions&	gl	= m_renderCtx.getFunctions();
1365
1366	initBuffers(numValues);
1367
1368	// Setup input buffer & copy data
1369	uploadInputBuffer(inputs, numValues);
1370
1371	if (!m_inputs.empty())
1372		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1373
1374	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1375
1376	// Render patches
1377	gl.patchParameteri(GL_PATCH_VERTICES, 3);
1378	gl.drawArrays(GL_PATCHES, 0, 3*numValues);
1379
1380	// Read back data
1381	readOutputBuffer(outputs, numValues);
1382}
1383
1384// TessEvaluationExecutor
1385
1386class TessEvaluationExecutor : public BufferIoExecutor
1387{
1388public:
1389	static TessEvaluationExecutor*	create					(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1390
1391									~TessEvaluationExecutor	(void);
1392
1393	void							execute					(int numValues, const void* const* inputs, void* const* outputs);
1394
1395
1396protected:
1397	static std::string				generateTessEvalShader	(const ShaderSpec& shaderSpec);
1398
1399private:
1400									TessEvaluationExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1401};
1402
1403TessEvaluationExecutor* TessEvaluationExecutor::create (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1404{
1405	checkTessSupport(renderCtx, shaderSpec, glu::SHADERTYPE_TESSELLATION_EVALUATION);
1406
1407	return new TessEvaluationExecutor(renderCtx, shaderSpec);
1408}
1409
1410static std::string generatePassthroughTessControlShader (glu::GLSLVersion version)
1411{
1412	std::ostringstream src;
1413
1414	src << glu::getGLSLVersionDeclaration(version) << "\n";
1415
1416	if (glu::glslVersionIsES(version) && version <= glu::GLSL_VERSION_310_ES)
1417		src << "#extension GL_EXT_tessellation_shader : require\n\n";
1418
1419	src << "layout(vertices = 1) out;\n\n";
1420
1421	src << "void main (void)\n{\n";
1422
1423	for (int ndx = 0; ndx < 2; ndx++)
1424		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1425
1426	for (int ndx = 0; ndx < 4; ndx++)
1427		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1428
1429	src << "}\n";
1430
1431	return src.str();
1432}
1433
1434std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
1435{
1436	std::ostringstream src;
1437
1438	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1439
1440	if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
1441		src << "#extension GL_EXT_tessellation_shader : require\n";
1442
1443	if (!shaderSpec.globalDeclarations.empty())
1444		src << shaderSpec.globalDeclarations << "\n";
1445
1446	src << "\n";
1447
1448	src << "layout(isolines, equal_spacing) in;\n\n";
1449
1450	declareBufferBlocks(src, shaderSpec);
1451
1452	src << "void main (void)\n{\n"
1453		<< "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
1454		<< "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
1455
1456	generateExecBufferIo(src, shaderSpec, "invocationId");
1457
1458	src	<< "}\n";
1459
1460	return src.str();
1461}
1462
1463TessEvaluationExecutor::TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1464	: BufferIoExecutor	(renderCtx, shaderSpec, glu::ProgramSources()
1465							<< glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1466							<< glu::TessellationControlSource(generatePassthroughTessControlShader(shaderSpec.version))
1467							<< glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec))
1468							<< glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1469{
1470}
1471
1472TessEvaluationExecutor::~TessEvaluationExecutor (void)
1473{
1474}
1475
1476void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1477{
1478	const glw::Functions&	gl				= m_renderCtx.getFunctions();
1479	const int				alignedValues	= deAlign32(numValues, 2);
1480
1481	// Initialize buffers with aligned value count to make room for padding
1482	initBuffers(alignedValues);
1483
1484	// Setup input buffer & copy data
1485	uploadInputBuffer(inputs, numValues);
1486
1487	// \todo [2014-06-26 pyry] Duplicate last value in the buffer to prevent infinite loops for example?
1488
1489	if (!m_inputs.empty())
1490		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1491
1492	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1493
1494	// Render patches
1495	gl.patchParameteri(GL_PATCH_VERTICES, 2);
1496	gl.drawArrays(GL_PATCHES, 0, alignedValues);
1497
1498	// Read back data
1499	readOutputBuffer(outputs, numValues);
1500}
1501
1502// Utilities
1503
1504ShaderExecutor* createExecutor (const glu::RenderContext& renderCtx, glu::ShaderType shaderType, const ShaderSpec& shaderSpec)
1505{
1506	switch (shaderType)
1507	{
1508		case glu::SHADERTYPE_VERTEX:					return new VertexShaderExecutor			(renderCtx, shaderSpec);
1509		case glu::SHADERTYPE_TESSELLATION_CONTROL:		return TessControlExecutor::create		(renderCtx, shaderSpec);
1510		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	return TessEvaluationExecutor::create	(renderCtx, shaderSpec);
1511		case glu::SHADERTYPE_GEOMETRY:					return GeometryShaderExecutor::create	(renderCtx, shaderSpec);
1512		case glu::SHADERTYPE_FRAGMENT:					return new FragmentShaderExecutor		(renderCtx, shaderSpec);
1513		case glu::SHADERTYPE_COMPUTE:					return new ComputeShaderExecutor		(renderCtx, shaderSpec);
1514		default:
1515			throw tcu::InternalError("Unsupported shader type");
1516	}
1517}
1518
1519} // ShaderExecUtil
1520} // gls
1521} // deqp
1522