glsShaderExecUtil.cpp revision 582ae6e4b619cd8255b95447a3475070fff1b6b4
1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL (ES) Module
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Shader execution utilities.
22 *//*--------------------------------------------------------------------*/
23
24#include "glsShaderExecUtil.hpp"
25#include "gluRenderContext.hpp"
26#include "gluDrawUtil.hpp"
27#include "gluObjectWrapper.hpp"
28#include "gluShaderProgram.hpp"
29#include "gluTextureUtil.hpp"
30#include "gluProgramInterfaceQuery.hpp"
31#include "gluPixelTransfer.hpp"
32#include "gluStrUtil.hpp"
33#include "tcuTestLog.hpp"
34#include "glwFunctions.hpp"
35#include "glwEnums.hpp"
36#include "deSTLUtil.hpp"
37#include "deStringUtil.hpp"
38#include "deUniquePtr.hpp"
39#include "deMemory.h"
40
41#include <map>
42
43namespace deqp
44{
45namespace gls
46{
47
48namespace ShaderExecUtil
49{
50
51using std::vector;
52
53static bool isExtensionSupported (const glu::RenderContext& renderCtx, const std::string& extension)
54{
55	const glw::Functions&	gl		= renderCtx.getFunctions();
56	int						numExts	= 0;
57
58	gl.getIntegerv(GL_NUM_EXTENSIONS, &numExts);
59
60	for (int ndx = 0; ndx < numExts; ndx++)
61	{
62		const char* curExt = (const char*)gl.getStringi(GL_EXTENSIONS, ndx);
63
64		if (extension == curExt)
65			return true;
66	}
67
68	return false;
69}
70
71static void checkExtension (const glu::RenderContext& renderCtx, const std::string& extension)
72{
73	if (!isExtensionSupported(renderCtx, extension))
74		throw tcu::NotSupportedError(extension + " is not supported");
75}
76
77static void checkLimit (const glu::RenderContext& renderCtx, deUint32 pname, int required)
78{
79	const glw::Functions&	gl					= renderCtx.getFunctions();
80	int						implementationLimit	= -1;
81	deUint32				error;
82
83	gl.getIntegerv(pname, &implementationLimit);
84	error = gl.getError();
85
86	if (error != GL_NO_ERROR)
87		throw tcu::TestError("Failed to query " + de::toString(glu::getGettableStateStr(pname)) + " - got " + de::toString(glu::getErrorStr(error)));
88	if (implementationLimit < required)
89		throw tcu::NotSupportedError("Test requires " + de::toString(glu::getGettableStateStr(pname)) + " >= " + de::toString(required) + ", got " + de::toString(implementationLimit));
90}
91
92// Shader utilities
93
94static std::string generateVertexShader (const ShaderSpec& shaderSpec)
95{
96	const bool			usesInout	= glu::glslVersionUsesInOutQualifiers(shaderSpec.version);
97	const char*			in			= usesInout ? "in"		: "attribute";
98	const char*			out			= usesInout ? "out"		: "varying";
99	std::ostringstream	src;
100
101	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
102
103	if (!shaderSpec.globalDeclarations.empty())
104		src << shaderSpec.globalDeclarations << "\n";
105
106	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
107		src << in << " " << glu::declare(input->varType, input->name) << ";\n";
108
109	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
110	{
111		DE_ASSERT(output->varType.isBasicType());
112
113		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
114		{
115			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
116			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
117			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
118
119			src << "flat " << out << " " << glu::declare(intType, "o_" + output->name) << ";\n";
120		}
121		else
122			src << "flat " << out << " " << glu::declare(output->varType, output->name) << ";\n";
123	}
124
125	src << "\n"
126		<< "void main (void)\n"
127		<< "{\n"
128		<< "	gl_Position = vec4(0.0);\n"
129		<< "	gl_PointSize = 1.0;\n\n";
130
131	// Declare necessary output variables (bools).
132	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
133	{
134		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
135			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
136	}
137
138	// Operation - indented to correct level.
139	{
140		std::istringstream	opSrc	(shaderSpec.source);
141		std::string			line;
142
143		while (std::getline(opSrc, line))
144			src << "\t" << line << "\n";
145	}
146
147	// Assignments to outputs.
148	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
149	{
150		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
151		{
152			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
153			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
154
155			src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
156		}
157	}
158
159	src << "}\n";
160
161	return src.str();
162}
163
164static std::string generateGeometryShader (const ShaderSpec& shaderSpec)
165{
166	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
167
168	std::ostringstream	src;
169
170	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
171
172	if (glu::glslVersionIsES(shaderSpec.version) && shaderSpec.version <= glu::GLSL_VERSION_310_ES)
173		src << "#extension GL_EXT_geometry_shader : require\n";
174
175	if (!shaderSpec.globalDeclarations.empty())
176		src << shaderSpec.globalDeclarations << "\n";
177
178	src << "layout(points) in;\n"
179		<< "layout(points, max_vertices = 1) out;\n";
180
181	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
182		src << "flat in " << glu::declare(input->varType, "geom_" + input->name) << "[];\n";
183
184	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
185	{
186		DE_ASSERT(output->varType.isBasicType());
187
188		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
189		{
190			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
191			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
192			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
193
194			src << "flat out " << glu::declare(intType, "o_" + output->name) << ";\n";
195		}
196		else
197			src << "flat out " << glu::declare(output->varType, output->name) << ";\n";
198	}
199
200	src << "\n"
201		<< "void main (void)\n"
202		<< "{\n"
203		<< "	gl_Position = gl_in[0].gl_Position;\n\n";
204
205	// Fetch input variables
206	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
207		src << "\t" << glu::declare(input->varType, input->name) << " = geom_" << input->name << "[0];\n";
208
209	// Declare necessary output variables (bools).
210	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
211	{
212		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
213			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
214	}
215
216	src << "\n";
217
218	// Operation - indented to correct level.
219	{
220		std::istringstream	opSrc	(shaderSpec.source);
221		std::string			line;
222
223		while (std::getline(opSrc, line))
224			src << "\t" << line << "\n";
225	}
226
227	// Assignments to outputs.
228	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
229	{
230		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
231		{
232			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
233			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
234
235			src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
236		}
237	}
238
239	src << "	EmitVertex();\n"
240		<< "	EndPrimitive();\n"
241		<< "}\n";
242
243	return src.str();
244}
245
246static std::string generateEmptyFragmentSource (glu::GLSLVersion version)
247{
248	const bool			customOut		= glu::glslVersionUsesInOutQualifiers(version);
249	std::ostringstream	src;
250
251	src << glu::getGLSLVersionDeclaration(version) << "\n";
252
253	// \todo [2013-08-05 pyry] Do we need one dummy output?
254
255	src << "void main (void)\n{\n";
256	if (!customOut)
257		src << "	gl_FragColor = vec4(0.0);\n";
258	src << "}\n";
259
260	return src.str();
261}
262
263static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
264{
265	// flat qualifier is not present in earlier versions?
266	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
267
268	std::ostringstream src;
269
270	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n"
271		<< "in highp vec4 a_position;\n";
272
273	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
274	{
275		src << "in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
276			<< "flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
277	}
278
279	src << "\nvoid main (void)\n{\n"
280		<< "	gl_Position = a_position;\n"
281		<< "	gl_PointSize = 1.0;\n";
282
283	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
284		src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
285
286	src << "}\n";
287
288	return src.str();
289}
290
291static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap)
292{
293	DE_ASSERT(glu::glslVersionUsesInOutQualifiers(shaderSpec.version));
294
295	std::ostringstream	src;
296	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
297
298	if (!shaderSpec.globalDeclarations.empty())
299		src << shaderSpec.globalDeclarations << "\n";
300
301	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
302		src << "flat in " << glu::declare(input->varType, input->name) << ";\n";
303
304	for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
305	{
306		const Symbol&				output		= shaderSpec.outputs[outNdx];
307		const int					location	= de::lookup(outLocationMap, output.name);
308		const std::string			outVarName	= "o_" + output.name;
309		glu::VariableDeclaration	decl		(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
310
311		TCU_CHECK_INTERNAL(output.varType.isBasicType());
312
313		if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
314		{
315			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
316			const glu::DataType	uintBasicType	= vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
317			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
318
319			decl.varType = uintType;
320			src << decl << ";\n";
321		}
322		else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
323		{
324			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
325			const glu::DataType	intBasicType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
326			const glu::VarType	intType			(intBasicType, glu::PRECISION_HIGHP);
327
328			decl.varType = intType;
329			src << decl << ";\n";
330		}
331		else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
332		{
333			const int			vecSize			= glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
334			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
335			const glu::DataType	uintBasicType	= glu::getDataTypeUintVec(vecSize);
336			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
337
338			decl.varType = uintType;
339			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
340			{
341				decl.name				= outVarName + "_" + de::toString(vecNdx);
342				decl.layout.location	= location + vecNdx;
343				src << decl << ";\n";
344			}
345		}
346		else
347			src << glu::VariableDeclaration(output.varType, output.name, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, location) << ";\n";
348	}
349
350	src << "\nvoid main (void)\n{\n";
351
352	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
353	{
354		if ((useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType())) ||
355			glu::isDataTypeBoolOrBVec(output->varType.getBasicType()) ||
356			glu::isDataTypeMatrix(output->varType.getBasicType()))
357			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
358	}
359
360	// Operation - indented to correct level.
361	{
362		std::istringstream	opSrc	(shaderSpec.source);
363		std::string			line;
364
365		while (std::getline(opSrc, line))
366			src << "\t" << line << "\n";
367	}
368
369	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
370	{
371		if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
372			src << "	o_" << output->name << " = floatBitsToUint(" << output->name << ");\n";
373		else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374		{
375			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376
377			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378				if (useIntOutputs)
379					src << "\to_" << output->name << "_" << vecNdx << " = floatBitsToUint(" << output->name << "[" << vecNdx << "]);\n";
380				else
381					src << "\to_" << output->name << "_" << vecNdx << " = " << output->name << "[" << vecNdx << "];\n";
382		}
383		else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384		{
385			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
386			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387
388			src << "\to_" << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
389		}
390	}
391
392	src << "}\n";
393
394	return src.str();
395}
396
397// ShaderExecutor
398
399ShaderExecutor::ShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
400	: m_renderCtx	(renderCtx)
401	, m_inputs		(shaderSpec.inputs)
402	, m_outputs		(shaderSpec.outputs)
403{
404}
405
406ShaderExecutor::~ShaderExecutor (void)
407{
408}
409
410void ShaderExecutor::useProgram (void)
411{
412	DE_ASSERT(isOk());
413	m_renderCtx.getFunctions().useProgram(getProgram());
414}
415
416// VertexProcessorExecutor (base class for vertex and geometry executors)
417
418class VertexProcessorExecutor : public ShaderExecutor
419{
420public:
421								VertexProcessorExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
422								~VertexProcessorExecutor(void);
423
424	bool						isOk					(void) const				{ return m_program.isOk();			}
425	void						log						(tcu::TestLog& dst) const	{ dst << m_program;					}
426	deUint32					getProgram				(void) const				{ return m_program.getProgram();	}
427
428	void						execute					(int numValues, const void* const* inputs, void* const* outputs);
429
430protected:
431	glu::ShaderProgram			m_program;
432};
433
434template<typename Iterator>
435struct SymbolNameIterator
436{
437	Iterator symbolIter;
438
439	SymbolNameIterator (Iterator symbolIter_) : symbolIter(symbolIter_) {}
440
441	inline SymbolNameIterator&	operator++	(void)								{ ++symbolIter; return *this;				}
442
443	inline bool					operator==	(const SymbolNameIterator& other)	{ return symbolIter == other.symbolIter;	}
444	inline bool					operator!=	(const SymbolNameIterator& other)	{ return symbolIter != other.symbolIter;	}
445
446	inline std::string operator* (void) const
447	{
448		if (glu::isDataTypeBoolOrBVec(symbolIter->varType.getBasicType()))
449			return "o_" + symbolIter->name;
450		else
451			return symbolIter->name;
452	}
453};
454
455template<typename Iterator>
456inline glu::TransformFeedbackVaryings<SymbolNameIterator<Iterator> > getTFVaryings (Iterator begin, Iterator end)
457{
458	return glu::TransformFeedbackVaryings<SymbolNameIterator<Iterator> >(SymbolNameIterator<Iterator>(begin), SymbolNameIterator<Iterator>(end));
459}
460
461VertexProcessorExecutor::VertexProcessorExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
462	: ShaderExecutor	(renderCtx, shaderSpec)
463	, m_program			(renderCtx,
464						 glu::ProgramSources(sources) << getTFVaryings(shaderSpec.outputs.begin(), shaderSpec.outputs.end())
465													  << glu::TransformFeedbackMode(GL_INTERLEAVED_ATTRIBS))
466{
467}
468
469VertexProcessorExecutor::~VertexProcessorExecutor (void)
470{
471}
472
473template<typename Iterator>
474static int computeTotalScalarSize (Iterator begin, Iterator end)
475{
476	int size = 0;
477	for (Iterator cur = begin; cur != end; ++cur)
478		size += cur->varType.getScalarSize();
479	return size;
480}
481
482void VertexProcessorExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
483{
484	const glw::Functions&					gl					= m_renderCtx.getFunctions();
485	const bool								useTFObject			= isContextTypeES(m_renderCtx.getType()) || (isContextTypeGLCore(m_renderCtx.getType()) && m_renderCtx.getType().getMajorVersion() >= 4);
486	vector<glu::VertexArrayBinding>			vertexArrays;
487	de::UniquePtr<glu::TransformFeedback>	transformFeedback	(useTFObject ? new glu::TransformFeedback(m_renderCtx) : DE_NULL);
488	glu::Buffer								outputBuffer		(m_renderCtx);
489	const int								outputBufferStride	= computeTotalScalarSize(m_outputs.begin(), m_outputs.end())*sizeof(deUint32);
490
491	// Setup inputs.
492	for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
493	{
494		const Symbol&		symbol		= m_inputs[inputNdx];
495		const void*			ptr			= inputs[inputNdx];
496		const glu::DataType	basicType	= symbol.varType.getBasicType();
497		const int			vecSize		= glu::getDataTypeScalarSize(basicType);
498
499		if (glu::isDataTypeFloatOrVec(basicType))
500			vertexArrays.push_back(glu::va::Float(symbol.name, vecSize, numValues, 0, (const float*)ptr));
501		else if (glu::isDataTypeIntOrIVec(basicType))
502			vertexArrays.push_back(glu::va::Int32(symbol.name, vecSize, numValues, 0, (const deInt32*)ptr));
503		else if (glu::isDataTypeUintOrUVec(basicType))
504			vertexArrays.push_back(glu::va::Uint32(symbol.name, vecSize, numValues, 0, (const deUint32*)ptr));
505		else if (glu::isDataTypeMatrix(basicType))
506		{
507			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
508			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
509			int		stride	= numRows * numCols * sizeof(float);
510
511			for (int colNdx = 0; colNdx < numCols; ++colNdx)
512				vertexArrays.push_back(glu::va::Float(symbol.name, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
513		}
514		else
515			DE_ASSERT(false);
516	}
517
518	// Setup TF outputs.
519	if (useTFObject)
520		gl.bindTransformFeedback(GL_TRANSFORM_FEEDBACK, **transformFeedback);
521	gl.bindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, *outputBuffer);
522	gl.bufferData(GL_TRANSFORM_FEEDBACK_BUFFER, outputBufferStride*numValues, DE_NULL, GL_STREAM_READ);
523	gl.bindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, *outputBuffer);
524	GLU_EXPECT_NO_ERROR(gl.getError(), "Error in TF setup");
525
526	// Draw with rasterization disabled.
527	gl.beginTransformFeedback(GL_POINTS);
528	gl.enable(GL_RASTERIZER_DISCARD);
529	glu::draw(m_renderCtx, m_program.getProgram(), (int)vertexArrays.size(), vertexArrays.empty() ? DE_NULL : &vertexArrays[0],
530			  glu::pr::Points(numValues));
531	gl.disable(GL_RASTERIZER_DISCARD);
532	gl.endTransformFeedback();
533	GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
534
535	// Read back data.
536	{
537		const void*	srcPtr		= gl.mapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, outputBufferStride*numValues, GL_MAP_READ_BIT);
538		int			curOffset	= 0; // Offset in buffer in bytes.
539
540		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER)");
541		TCU_CHECK(srcPtr != DE_NULL);
542
543		for (int outputNdx = 0; outputNdx < (int)m_outputs.size(); outputNdx++)
544		{
545			const Symbol&		symbol		= m_outputs[outputNdx];
546			void*				dstPtr		= outputs[outputNdx];
547			const int			scalarSize	= symbol.varType.getScalarSize();
548
549			for (int ndx = 0; ndx < numValues; ndx++)
550				deMemcpy((deUint32*)dstPtr + scalarSize*ndx, (const deUint8*)srcPtr + curOffset + ndx*outputBufferStride, scalarSize*sizeof(deUint32));
551
552			curOffset += scalarSize*sizeof(deUint32);
553		}
554
555		gl.unmapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER);
556		GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
557	}
558
559	if (useTFObject)
560		gl.bindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0);
561	gl.bindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
562	GLU_EXPECT_NO_ERROR(gl.getError(), "Restore state");
563}
564
565// VertexShaderExecutor
566
567class VertexShaderExecutor : public VertexProcessorExecutor
568{
569public:
570								VertexShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
571};
572
573VertexShaderExecutor::VertexShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
574	: VertexProcessorExecutor	(renderCtx, shaderSpec,
575								 glu::ProgramSources() << glu::VertexSource(generateVertexShader(shaderSpec))
576													   << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
577{
578}
579
580// GeometryShaderExecutor
581
582class CheckGeomSupport
583{
584public:
585	inline CheckGeomSupport (const glu::RenderContext& renderCtx)
586	{
587		if (renderCtx.getType().getAPI().getProfile() == glu::PROFILE_ES)
588			checkExtension(renderCtx, "GL_EXT_geometry_shader");
589	}
590};
591
592class GeometryShaderExecutor : private CheckGeomSupport, public VertexProcessorExecutor
593{
594public:
595								GeometryShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
596};
597
598GeometryShaderExecutor::GeometryShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
599	: CheckGeomSupport			(renderCtx)
600	, VertexProcessorExecutor	(renderCtx, shaderSpec,
601								 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "", "geom_"))
602													   << glu::GeometrySource(generateGeometryShader(shaderSpec))
603													   << glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
604{
605}
606
607// FragmentShaderExecutor
608
609class FragmentShaderExecutor : public ShaderExecutor
610{
611public:
612								FragmentShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
613								~FragmentShaderExecutor	(void);
614
615	bool						isOk					(void) const				{ return m_program.isOk();			}
616	void						log						(tcu::TestLog& dst) const	{ dst << m_program;					}
617	deUint32					getProgram				(void) const				{ return m_program.getProgram();	}
618
619	void						execute					(int numValues, const void* const* inputs, void* const* outputs);
620
621protected:
622	std::vector<const Symbol*>	m_outLocationSymbols;
623	std::map<std::string, int>	m_outLocationMap;
624	glu::ShaderProgram			m_program;
625};
626
627static std::map<std::string, int> generateLocationMap (const std::vector<Symbol>& symbols, std::vector<const Symbol*>& locationSymbols)
628{
629	std::map<std::string, int>	ret;
630	int							location	= 0;
631
632	locationSymbols.clear();
633
634	for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
635	{
636		const int	numLocations	= glu::getDataTypeNumLocations(it->varType.getBasicType());
637
638		TCU_CHECK_INTERNAL(!de::contains(ret, it->name));
639		de::insert(ret, it->name, location);
640		location += numLocations;
641
642		for (int ndx = 0; ndx < numLocations; ++ndx)
643			locationSymbols.push_back(&*it);
644	}
645
646	return ret;
647}
648
649inline bool hasFloatRenderTargets (const glu::RenderContext& renderCtx)
650{
651	glu::ContextType type = renderCtx.getType();
652	return glu::isContextTypeGLCore(type);
653}
654
655FragmentShaderExecutor::FragmentShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
656	: ShaderExecutor		(renderCtx, shaderSpec)
657	, m_outLocationSymbols	()
658	, m_outLocationMap		(generateLocationMap(m_outputs, m_outLocationSymbols))
659	, m_program				(renderCtx,
660							 glu::ProgramSources() << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", ""))
661												   << glu::FragmentSource(generateFragmentShader(shaderSpec, !hasFloatRenderTargets(renderCtx), m_outLocationMap)))
662{
663}
664
665FragmentShaderExecutor::~FragmentShaderExecutor (void)
666{
667}
668
669inline int queryInt (const glw::Functions& gl, deUint32 pname)
670{
671	int value = 0;
672	gl.getIntegerv(pname, &value);
673	return value;
674}
675
676static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
677{
678	const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
679	{
680		tcu::TextureFormat::R,
681		tcu::TextureFormat::RG,
682		tcu::TextureFormat::RGBA,	// No RGB variants available.
683		tcu::TextureFormat::RGBA
684	};
685
686	const glu::DataType					basicType		= outputType.getBasicType();
687	const int							numComps		= glu::getDataTypeNumComponents(basicType);
688	tcu::TextureFormat::ChannelType		channelType;
689
690	switch (glu::getDataTypeScalarType(basicType))
691	{
692		case glu::TYPE_UINT:	channelType = tcu::TextureFormat::UNSIGNED_INT32;												break;
693		case glu::TYPE_INT:		channelType = tcu::TextureFormat::SIGNED_INT32;													break;
694		case glu::TYPE_BOOL:	channelType = tcu::TextureFormat::SIGNED_INT32;													break;
695		case glu::TYPE_FLOAT:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;	break;
696		default:
697			throw tcu::InternalError("Invalid output type");
698	}
699
700	DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
701
702	return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
703}
704
705void FragmentShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
706{
707	const glw::Functions&			gl					= m_renderCtx.getFunctions();
708	const bool						useIntOutputs		= !hasFloatRenderTargets(m_renderCtx);
709	const int						maxRenderbufferSize	= queryInt(gl, GL_MAX_RENDERBUFFER_SIZE);
710	const int						framebufferW		= de::min(maxRenderbufferSize, numValues);
711	const int						framebufferH		= (numValues / framebufferW) + ((numValues % framebufferW != 0) ? 1 : 0);
712
713	glu::Framebuffer				framebuffer			(m_renderCtx);
714	glu::RenderbufferVector			renderbuffers		(m_renderCtx, m_outLocationSymbols.size());
715
716	vector<glu::VertexArrayBinding>	vertexArrays;
717	vector<tcu::Vec2>				positions			(numValues);
718
719	if (framebufferH > maxRenderbufferSize)
720		throw tcu::NotSupportedError("Value count is too high for maximum supported renderbuffer size");
721
722	// Compute positions - 1px points are used to drive fragment shading.
723	for (int valNdx = 0; valNdx < numValues; valNdx++)
724	{
725		const int		ix		= valNdx % framebufferW;
726		const int		iy		= valNdx / framebufferW;
727		const float		fx		= -1.0f + 2.0f*((float(ix) + 0.5f) / float(framebufferW));
728		const float		fy		= -1.0f + 2.0f*((float(iy) + 0.5f) / float(framebufferH));
729
730		positions[valNdx] = tcu::Vec2(fx, fy);
731	}
732
733	// Vertex inputs.
734	vertexArrays.push_back(glu::va::Float("a_position", 2, numValues, 0, (const float*)&positions[0]));
735
736	for (int inputNdx = 0; inputNdx < (int)m_inputs.size(); inputNdx++)
737	{
738		const Symbol&		symbol		= m_inputs[inputNdx];
739		const std::string	attribName	= "a_" + symbol.name;
740		const void*			ptr			= inputs[inputNdx];
741		const glu::DataType	basicType	= symbol.varType.getBasicType();
742		const int			vecSize		= glu::getDataTypeScalarSize(basicType);
743
744		if (glu::isDataTypeFloatOrVec(basicType))
745			vertexArrays.push_back(glu::va::Float(attribName, vecSize, numValues, 0, (const float*)ptr));
746		else if (glu::isDataTypeIntOrIVec(basicType))
747			vertexArrays.push_back(glu::va::Int32(attribName, vecSize, numValues, 0, (const deInt32*)ptr));
748		else if (glu::isDataTypeUintOrUVec(basicType))
749			vertexArrays.push_back(glu::va::Uint32(attribName, vecSize, numValues, 0, (const deUint32*)ptr));
750		else if (glu::isDataTypeMatrix(basicType))
751		{
752			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
753			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
754			int		stride	= numRows * numCols * sizeof(float);
755
756			for (int colNdx = 0; colNdx < numCols; ++colNdx)
757				vertexArrays.push_back(glu::va::Float(attribName, colNdx, numRows, numValues, stride, ((const float*)ptr) + colNdx * numRows));
758		}
759		else
760			DE_ASSERT(false);
761	}
762
763	// Construct framebuffer.
764	gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
765
766	for (int outNdx = 0; outNdx < (int)m_outLocationSymbols.size(); ++outNdx)
767	{
768		const Symbol&	output			= *m_outLocationSymbols[outNdx];
769		const deUint32	renderbuffer	= renderbuffers[outNdx];
770		const deUint32	format			= glu::getInternalFormat(getRenderbufferFormatForOutput(output.varType, useIntOutputs));
771
772		gl.bindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
773		gl.renderbufferStorage(GL_RENDERBUFFER, format, framebufferW, framebufferH);
774		gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0+outNdx, GL_RENDERBUFFER, renderbuffer);
775	}
776	gl.bindRenderbuffer(GL_RENDERBUFFER, 0);
777	GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up framebuffer object");
778	TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
779
780	{
781		vector<deUint32> drawBuffers(m_outLocationSymbols.size());
782		for (int ndx = 0; ndx < (int)m_outLocationSymbols.size(); ndx++)
783			drawBuffers[ndx] = GL_COLOR_ATTACHMENT0+ndx;
784		gl.drawBuffers((int)drawBuffers.size(), &drawBuffers[0]);
785		GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawBuffers()");
786	}
787
788	// Render
789	gl.viewport(0, 0, framebufferW, framebufferH);
790	glu::draw(m_renderCtx, m_program.getProgram(), (int)vertexArrays.size(), &vertexArrays[0],
791			  glu::pr::Points(numValues));
792	GLU_EXPECT_NO_ERROR(gl.getError(), "Error in draw");
793
794	// Read back pixels.
795	{
796		tcu::TextureLevel	tmpBuf;
797
798		// \todo [2013-08-07 pyry] Some fast-paths could be added here.
799
800		for (int outNdx = 0; outNdx < (int)m_outputs.size(); ++outNdx)
801		{
802			const Symbol&				output			= m_outputs[outNdx];
803			const int					outSize			= output.varType.getScalarSize();
804			const int					outVecSize		= glu::getDataTypeNumComponents(output.varType.getBasicType());
805			const int					outNumLocs		= glu::getDataTypeNumLocations(output.varType.getBasicType());
806			deUint32*					dstPtrBase		= static_cast<deUint32*>(outputs[outNdx]);
807			const tcu::TextureFormat	format			= getRenderbufferFormatForOutput(output.varType, useIntOutputs);
808			const tcu::TextureFormat	readFormat		(tcu::TextureFormat::RGBA, format.type);
809			const int					outLocation		= de::lookup(m_outLocationMap, output.name);
810
811			tmpBuf.setStorage(readFormat, framebufferW, framebufferH);
812
813			for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
814			{
815				gl.readBuffer(GL_COLOR_ATTACHMENT0 + outLocation + locNdx);
816				glu::readPixels(m_renderCtx, 0, 0, tmpBuf.getAccess());
817				GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels");
818
819				if (outSize == 4 && outNumLocs == 1)
820					deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues*outVecSize*sizeof(deUint32));
821				else
822				{
823					for (int valNdx = 0; valNdx < numValues; valNdx++)
824					{
825						const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx*4;
826						deUint32*		dstPtr = &dstPtrBase[outSize*valNdx + outVecSize*locNdx];
827						deMemcpy(dstPtr, srcPtr, outVecSize*sizeof(deUint32));
828					}
829				}
830			}
831		}
832	}
833
834	// \todo [2013-08-07 pyry] Clear draw buffers & viewport?
835	gl.bindFramebuffer(GL_FRAMEBUFFER, 0);
836}
837
838// Shared utilities for compute and tess executors
839
840static deUint32 getVecStd430ByteAlignment (glu::DataType type)
841{
842	switch (glu::getDataTypeScalarSize(type))
843	{
844		case 1:		return 4u;
845		case 2:		return 8u;
846		case 3:		return 16u;
847		case 4:		return 16u;
848		default:
849			DE_ASSERT(false);
850			return 0u;
851	}
852}
853
854class BufferIoExecutor : public ShaderExecutor
855{
856public:
857						BufferIoExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources);
858						~BufferIoExecutor	(void);
859
860	bool				isOk				(void) const				{ return m_program.isOk();			}
861	void				log					(tcu::TestLog& dst) const	{ dst << m_program;					}
862	deUint32			getProgram			(void) const				{ return m_program.getProgram();	}
863
864protected:
865	enum
866	{
867		INPUT_BUFFER_BINDING	= 0,
868		OUTPUT_BUFFER_BINDING	= 1,
869	};
870
871	void				initBuffers			(int numValues);
872	deUint32			getInputBuffer		(void) const		{ return *m_inputBuffer;					}
873	deUint32			getOutputBuffer		(void) const		{ return *m_outputBuffer;					}
874	deUint32			getInputStride		(void) const		{ return getLayoutStride(m_inputLayout);	}
875	deUint32			getOutputStride		(void) const		{ return getLayoutStride(m_outputLayout);	}
876
877	void				uploadInputBuffer	(const void* const* inputPtrs, int numValues);
878	void				readOutputBuffer	(void* const* outputPtrs, int numValues);
879
880	static void			declareBufferBlocks	(std::ostream& src, const ShaderSpec& spec);
881	static void			generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
882
883	glu::ShaderProgram	m_program;
884
885private:
886	struct VarLayout
887	{
888		deUint32		offset;
889		deUint32		stride;
890		deUint32		matrixStride;
891
892		VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
893	};
894
895	void				resizeInputBuffer	(int newSize);
896	void				resizeOutputBuffer	(int newSize);
897
898	static void			computeVarLayout	(const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
899	static deUint32		getLayoutStride		(const vector<VarLayout>& layout);
900
901	static void			copyToBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
902	static void			copyFromBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
903
904	glu::Buffer			m_inputBuffer;
905	glu::Buffer			m_outputBuffer;
906
907	vector<VarLayout>	m_inputLayout;
908	vector<VarLayout>	m_outputLayout;
909};
910
911BufferIoExecutor::BufferIoExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec, const glu::ProgramSources& sources)
912	: ShaderExecutor	(renderCtx, shaderSpec)
913	, m_program			(renderCtx, sources)
914	, m_inputBuffer		(renderCtx)
915	, m_outputBuffer	(renderCtx)
916{
917	computeVarLayout(m_inputs, &m_inputLayout);
918	computeVarLayout(m_outputs, &m_outputLayout);
919}
920
921BufferIoExecutor::~BufferIoExecutor (void)
922{
923}
924
925void BufferIoExecutor::resizeInputBuffer (int newSize)
926{
927	const glw::Functions& gl = m_renderCtx.getFunctions();
928	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_inputBuffer);
929	gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
930	GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate input buffer");
931}
932
933void BufferIoExecutor::resizeOutputBuffer (int newSize)
934{
935	const glw::Functions& gl = m_renderCtx.getFunctions();
936	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *m_outputBuffer);
937	gl.bufferData(GL_SHADER_STORAGE_BUFFER, newSize, DE_NULL, GL_STATIC_DRAW);
938	GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to allocate output buffer");
939}
940
941void BufferIoExecutor::initBuffers (int numValues)
942{
943	const deUint32		inputStride			= getLayoutStride(m_inputLayout);
944	const deUint32		outputStride		= getLayoutStride(m_outputLayout);
945	const int			inputBufferSize		= numValues * inputStride;
946	const int			outputBufferSize	= numValues * outputStride;
947
948	resizeInputBuffer(inputBufferSize);
949	resizeOutputBuffer(outputBufferSize);
950}
951
952void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
953{
954	deUint32	maxAlignment	= 0;
955	deUint32	curOffset		= 0;
956
957	DE_ASSERT(layout->empty());
958	layout->resize(symbols.size());
959
960	for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
961	{
962		const Symbol&		symbol		= symbols[varNdx];
963		const glu::DataType	basicType	= symbol.varType.getBasicType();
964		VarLayout&			layoutEntry	= (*layout)[varNdx];
965
966		if (glu::isDataTypeScalarOrVector(basicType))
967		{
968			const deUint32	alignment	= getVecStd430ByteAlignment(basicType);
969			const deUint32	size		= (deUint32)glu::getDataTypeScalarSize(basicType)*sizeof(deUint32);
970
971			curOffset		= (deUint32)deAlign32((int)curOffset, (int)alignment);
972			maxAlignment	= de::max(maxAlignment, alignment);
973
974			layoutEntry.offset			= curOffset;
975			layoutEntry.matrixStride	= 0;
976
977			curOffset += size;
978		}
979		else if (glu::isDataTypeMatrix(basicType))
980		{
981			const int				numVecs			= glu::getDataTypeMatrixNumColumns(basicType);
982			const glu::DataType		vecType			= glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
983			const deUint32			vecAlignment	= getVecStd430ByteAlignment(vecType);
984
985			curOffset		= (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
986			maxAlignment	= de::max(maxAlignment, vecAlignment);
987
988			layoutEntry.offset			= curOffset;
989			layoutEntry.matrixStride	= vecAlignment;
990
991			curOffset += vecAlignment*numVecs;
992		}
993		else
994			DE_ASSERT(false);
995	}
996
997	{
998		const deUint32	totalSize	= (deUint32)deAlign32(curOffset, maxAlignment);
999
1000		for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1001			varIter->stride = totalSize;
1002	}
1003}
1004
1005inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1006{
1007	return layout.empty() ? 0 : layout[0].stride;
1008}
1009
1010void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1011{
1012	if (varType.isBasicType())
1013	{
1014		const glu::DataType		basicType		= varType.getBasicType();
1015		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1016		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1017		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1018		const int				numComps		= scalarSize / numVecs;
1019
1020		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1021		{
1022			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1023			{
1024				const int		srcOffset		= sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1025				const int		dstOffset		= layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1026				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1027				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1028
1029				deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1030			}
1031		}
1032	}
1033	else
1034		throw tcu::InternalError("Unsupported type");
1035}
1036
1037void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1038{
1039	if (varType.isBasicType())
1040	{
1041		const glu::DataType		basicType		= varType.getBasicType();
1042		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1043		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1044		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1045		const int				numComps		= scalarSize / numVecs;
1046
1047		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1048		{
1049			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1050			{
1051				const int		srcOffset		= layout.offset + layout.stride*elemNdx + (isMatrix ? layout.matrixStride*vecNdx : 0);
1052				const int		dstOffset		= sizeof(deUint32)*(elemNdx*scalarSize + vecNdx*numComps);
1053				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1054				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1055
1056				deMemcpy(dstPtr, srcPtr, sizeof(deUint32)*numComps);
1057			}
1058		}
1059	}
1060	else
1061		throw tcu::InternalError("Unsupported type");
1062}
1063
1064void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1065{
1066	const glw::Functions&	gl				= m_renderCtx.getFunctions();
1067	const deUint32			buffer			= *m_inputBuffer;
1068	const deUint32			inputStride		= getLayoutStride(m_inputLayout);
1069	const int				inputBufferSize	= inputStride*numValues;
1070
1071	if (inputBufferSize == 0)
1072		return; // No inputs
1073
1074	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1075	void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, inputBufferSize, GL_MAP_WRITE_BIT);
1076	GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1077	TCU_CHECK(mapPtr);
1078
1079	try
1080	{
1081		DE_ASSERT(m_inputs.size() == m_inputLayout.size());
1082		for (size_t inputNdx = 0; inputNdx < m_inputs.size(); ++inputNdx)
1083		{
1084			const glu::VarType&		varType		= m_inputs[inputNdx].varType;
1085			const VarLayout&		layout		= m_inputLayout[inputNdx];
1086
1087			copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], mapPtr);
1088		}
1089	}
1090	catch (...)
1091	{
1092		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1093		throw;
1094	}
1095
1096	gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1097	GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1098}
1099
1100void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1101{
1102	const glw::Functions&	gl					= m_renderCtx.getFunctions();
1103	const deUint32			buffer				= *m_outputBuffer;
1104	const deUint32			outputStride		= getLayoutStride(m_outputLayout);
1105	const int				outputBufferSize	= numValues*outputStride;
1106
1107	DE_ASSERT(outputBufferSize > 0); // At least some outputs are required.
1108
1109	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
1110	void* mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, outputBufferSize, GL_MAP_READ_BIT);
1111	GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
1112	TCU_CHECK(mapPtr);
1113
1114	try
1115	{
1116		DE_ASSERT(m_outputs.size() == m_outputLayout.size());
1117		for (size_t outputNdx = 0; outputNdx < m_outputs.size(); ++outputNdx)
1118		{
1119			const glu::VarType&		varType		= m_outputs[outputNdx].varType;
1120			const VarLayout&		layout		= m_outputLayout[outputNdx];
1121
1122			copyFromBuffer(varType, layout, numValues, mapPtr, outputPtrs[outputNdx]);
1123		}
1124	}
1125	catch (...)
1126	{
1127		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1128		throw;
1129	}
1130
1131	gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
1132	GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
1133}
1134
1135void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1136{
1137	// Input struct
1138	if (!spec.inputs.empty())
1139	{
1140		glu::StructType inputStruct("Inputs");
1141		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1142			inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1143		src << glu::declare(&inputStruct) << ";\n";
1144	}
1145
1146	// Output struct
1147	{
1148		glu::StructType outputStruct("Outputs");
1149		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1150			outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1151		src << glu::declare(&outputStruct) << ";\n";
1152	}
1153
1154	src << "\n";
1155
1156	if (!spec.inputs.empty())
1157	{
1158		src	<< "layout(binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1159			<< "{\n"
1160			<< "	Inputs inputs[];\n"
1161			<< "};\n";
1162	}
1163
1164	src	<< "layout(binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1165		<< "{\n"
1166		<< "	Outputs outputs[];\n"
1167		<< "};\n"
1168		<< "\n";
1169}
1170
1171void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1172{
1173	for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1174		src << "\t" << glu::declare(symIter->varType, symIter->name) << " = inputs[" << invocationNdxName << "]." << symIter->name << ";\n";
1175
1176	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1177		src << "\t" << glu::declare(symIter->varType, symIter->name) << ";\n";
1178
1179	src << "\n";
1180
1181	{
1182		std::istringstream	opSrc	(spec.source);
1183		std::string			line;
1184
1185		while (std::getline(opSrc, line))
1186			src << "\t" << line << "\n";
1187	}
1188
1189	src << "\n";
1190	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1191		src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1192}
1193
1194// ComputeShaderExecutor
1195
1196class ComputeShaderExecutor : public BufferIoExecutor
1197{
1198public:
1199						ComputeShaderExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1200						~ComputeShaderExecutor	(void);
1201
1202	void				execute					(int numValues, const void* const* inputs, void* const* outputs);
1203
1204protected:
1205	static std::string	generateComputeShader	(const ShaderSpec& spec);
1206
1207	tcu::IVec3			m_maxWorkSize;
1208};
1209
1210std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
1211{
1212	std::ostringstream src;
1213
1214	src << glu::getGLSLVersionDeclaration(spec.version) << "\n";
1215
1216	if (!spec.globalDeclarations.empty())
1217		src << spec.globalDeclarations << "\n";
1218
1219	src << "layout(local_size_x = 1) in;\n"
1220		<< "\n";
1221
1222	declareBufferBlocks(src, spec);
1223
1224	src << "void main (void)\n"
1225		<< "{\n"
1226		<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
1227		<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
1228
1229	generateExecBufferIo(src, spec, "invocationNdx");
1230
1231	src << "}\n";
1232
1233	return src.str();
1234}
1235
1236ComputeShaderExecutor::ComputeShaderExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1237	: BufferIoExecutor	(renderCtx, shaderSpec,
1238						 glu::ProgramSources() << glu::ComputeSource(generateComputeShader(shaderSpec)))
1239{
1240	m_maxWorkSize	= tcu::IVec3(128,128,64); // Minimum in 3plus
1241}
1242
1243ComputeShaderExecutor::~ComputeShaderExecutor (void)
1244{
1245}
1246
1247void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1248{
1249	const glw::Functions&	gl						= m_renderCtx.getFunctions();
1250	const int				maxValuesPerInvocation	= m_maxWorkSize[0];
1251	const deUint32			inputStride				= getInputStride();
1252	const deUint32			outputStride			= getOutputStride();
1253
1254	initBuffers(numValues);
1255
1256	// Setup input buffer & copy data
1257	uploadInputBuffer(inputs, numValues);
1258
1259	// Perform compute invocations
1260	{
1261		int curOffset = 0;
1262		while (curOffset < numValues)
1263		{
1264			const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
1265
1266			if (inputStride > 0)
1267				gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer(), curOffset*inputStride, numToExec*inputStride);
1268
1269			gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer(), curOffset*outputStride, numToExec*outputStride);
1270			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferRange(GL_SHADER_STORAGE_BUFFER)");
1271
1272			gl.dispatchCompute(numToExec, 1, 1);
1273			GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
1274
1275			curOffset += numToExec;
1276		}
1277	}
1278
1279	// Read back data
1280	readOutputBuffer(outputs, numValues);
1281}
1282
1283// Tessellation utils
1284
1285static std::string generateVertexShaderForTess (glu::GLSLVersion version)
1286{
1287	std::ostringstream	src;
1288
1289	src << glu::getGLSLVersionDeclaration(version) << "\n";
1290
1291	src << "void main (void)\n{\n"
1292		<< "	gl_Position = vec4(gl_VertexID/2, gl_VertexID%2, 0.0, 1.0);\n"
1293		<< "}\n";
1294
1295	return src.str();
1296}
1297
1298class CheckTessSupport
1299{
1300public:
1301	enum Stage
1302	{
1303		STAGE_CONTROL = 0,
1304		STAGE_EVAL,
1305	};
1306
1307	inline CheckTessSupport (const glu::RenderContext& renderCtx, Stage stage)
1308	{
1309		const int numBlockRequired = 2; // highest binding is always 1 (output) i.e. count == 2
1310
1311		if (renderCtx.getType().getAPI().getProfile() == glu::PROFILE_ES)
1312			checkExtension(renderCtx, "GL_EXT_tessellation_shader");
1313
1314		if (stage == STAGE_CONTROL)
1315			checkLimit(renderCtx, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, numBlockRequired);
1316		else if (stage == STAGE_EVAL)
1317			checkLimit(renderCtx, GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, numBlockRequired);
1318		else
1319			DE_ASSERT(false);
1320	}
1321};
1322
1323// TessControlExecutor
1324
1325class TessControlExecutor : private CheckTessSupport, public BufferIoExecutor
1326{
1327public:
1328						TessControlExecutor			(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1329						~TessControlExecutor		(void);
1330
1331	void				execute						(int numValues, const void* const* inputs, void* const* outputs);
1332
1333protected:
1334	static std::string	generateTessControlShader	(const ShaderSpec& shaderSpec);
1335};
1336
1337std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
1338{
1339	std::ostringstream src;
1340
1341	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1342
1343	if (shaderSpec.version == glu::GLSL_VERSION_310_ES)
1344		src << "#extension GL_EXT_tessellation_shader : require\n";
1345
1346	if (!shaderSpec.globalDeclarations.empty())
1347		src << shaderSpec.globalDeclarations << "\n";
1348
1349	src << "\nlayout(vertices = 1) out;\n\n";
1350
1351	declareBufferBlocks(src, shaderSpec);
1352
1353	src << "void main (void)\n{\n";
1354
1355	for (int ndx = 0; ndx < 2; ndx++)
1356		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1357
1358	for (int ndx = 0; ndx < 4; ndx++)
1359		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1360
1361	src << "\n"
1362		<< "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
1363
1364	generateExecBufferIo(src, shaderSpec, "invocationId");
1365
1366	src << "}\n";
1367
1368	return src.str();
1369}
1370
1371static std::string generateEmptyTessEvalShader (glu::GLSLVersion version)
1372{
1373	std::ostringstream src;
1374
1375	src << glu::getGLSLVersionDeclaration(version) << "\n";
1376
1377	if (version == glu::GLSL_VERSION_310_ES)
1378		src << "#extension GL_EXT_tessellation_shader : require\n\n";
1379
1380	src << "layout(triangles, ccw) in;\n";
1381
1382	src << "\nvoid main (void)\n{\n"
1383		<< "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
1384		<< "}\n";
1385
1386	return src.str();
1387}
1388
1389TessControlExecutor::TessControlExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1390	: CheckTessSupport	(renderCtx, STAGE_CONTROL)
1391	, BufferIoExecutor	(renderCtx, shaderSpec, glu::ProgramSources()
1392							<< glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1393							<< glu::TessellationControlSource(generateTessControlShader(shaderSpec))
1394							<< glu::TessellationEvaluationSource(generateEmptyTessEvalShader(shaderSpec.version))
1395							<< glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1396{
1397}
1398
1399TessControlExecutor::~TessControlExecutor (void)
1400{
1401}
1402
1403void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1404{
1405	const glw::Functions&	gl	= m_renderCtx.getFunctions();
1406
1407	initBuffers(numValues);
1408
1409	// Setup input buffer & copy data
1410	uploadInputBuffer(inputs, numValues);
1411
1412	if (!m_inputs.empty())
1413		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1414
1415	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1416
1417	// Render patches
1418	gl.patchParameteri(GL_PATCH_VERTICES, 3);
1419	gl.drawArrays(GL_PATCHES, 0, 3*numValues);
1420
1421	// Read back data
1422	readOutputBuffer(outputs, numValues);
1423}
1424
1425// TessEvaluationExecutor
1426
1427class TessEvaluationExecutor : private CheckTessSupport, public BufferIoExecutor
1428{
1429public:
1430						TessEvaluationExecutor	(const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec);
1431						~TessEvaluationExecutor	(void);
1432
1433	void				execute					(int numValues, const void* const* inputs, void* const* outputs);
1434
1435protected:
1436	static std::string	generateTessEvalShader	(const ShaderSpec& shaderSpec);
1437};
1438
1439static std::string generatePassthroughTessControlShader (glu::GLSLVersion version)
1440{
1441	std::ostringstream src;
1442
1443	src << glu::getGLSLVersionDeclaration(version) << "\n";
1444
1445	if (version == glu::GLSL_VERSION_310_ES)
1446		src << "#extension GL_EXT_tessellation_shader : require\n\n";
1447
1448	src << "layout(vertices = 1) out;\n\n";
1449
1450	src << "void main (void)\n{\n";
1451
1452	for (int ndx = 0; ndx < 2; ndx++)
1453		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
1454
1455	for (int ndx = 0; ndx < 4; ndx++)
1456		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
1457
1458	src << "}\n";
1459
1460	return src.str();
1461}
1462
1463std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
1464{
1465	std::ostringstream src;
1466
1467	src << glu::getGLSLVersionDeclaration(shaderSpec.version) << "\n";
1468
1469	if (shaderSpec.version == glu::GLSL_VERSION_310_ES)
1470		src << "#extension GL_EXT_tessellation_shader : require\n";
1471
1472	if (!shaderSpec.globalDeclarations.empty())
1473		src << shaderSpec.globalDeclarations << "\n";
1474
1475	src << "\n";
1476
1477	src << "layout(isolines, equal_spacing) in;\n\n";
1478
1479	declareBufferBlocks(src, shaderSpec);
1480
1481	src << "void main (void)\n{\n"
1482		<< "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
1483		<< "\thighp uint invocationId = uint(gl_PrimitiveID) + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
1484
1485	generateExecBufferIo(src, shaderSpec, "invocationId");
1486
1487	src	<< "}\n";
1488
1489	return src.str();
1490}
1491
1492TessEvaluationExecutor::TessEvaluationExecutor (const glu::RenderContext& renderCtx, const ShaderSpec& shaderSpec)
1493	: CheckTessSupport	(renderCtx, STAGE_EVAL)
1494	, BufferIoExecutor	(renderCtx, shaderSpec, glu::ProgramSources()
1495							<< glu::VertexSource(generateVertexShaderForTess(shaderSpec.version))
1496							<< glu::TessellationControlSource(generatePassthroughTessControlShader(shaderSpec.version))
1497							<< glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec))
1498							<< glu::FragmentSource(generateEmptyFragmentSource(shaderSpec.version)))
1499{
1500}
1501
1502TessEvaluationExecutor::~TessEvaluationExecutor (void)
1503{
1504}
1505
1506void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs)
1507{
1508	const glw::Functions&	gl				= m_renderCtx.getFunctions();
1509	const int				alignedValues	= deAlign32(numValues, 2);
1510
1511	// Initialize buffers with aligned value count to make room for padding
1512	initBuffers(alignedValues);
1513
1514	// Setup input buffer & copy data
1515	uploadInputBuffer(inputs, numValues);
1516
1517	// \todo [2014-06-26 pyry] Duplicate last value in the buffer to prevent infinite loops for example?
1518
1519	if (!m_inputs.empty())
1520		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BUFFER_BINDING, getInputBuffer());
1521
1522	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BUFFER_BINDING, getOutputBuffer());
1523
1524	// Render patches
1525	gl.patchParameteri(GL_PATCH_VERTICES, 2);
1526	gl.drawArrays(GL_PATCHES, 0, 2*alignedValues);
1527
1528	// Read back data
1529	readOutputBuffer(outputs, numValues);
1530}
1531
1532// Utilities
1533
1534ShaderExecutor* createExecutor (const glu::RenderContext& renderCtx, glu::ShaderType shaderType, const ShaderSpec& shaderSpec)
1535{
1536	switch (shaderType)
1537	{
1538		case glu::SHADERTYPE_VERTEX:					return new VertexShaderExecutor		(renderCtx, shaderSpec);
1539		case glu::SHADERTYPE_TESSELLATION_CONTROL:		return new TessControlExecutor		(renderCtx, shaderSpec);
1540		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	return new TessEvaluationExecutor	(renderCtx, shaderSpec);
1541		case glu::SHADERTYPE_GEOMETRY:					return new GeometryShaderExecutor	(renderCtx, shaderSpec);
1542		case glu::SHADERTYPE_FRAGMENT:					return new FragmentShaderExecutor	(renderCtx, shaderSpec);
1543		case glu::SHADERTYPE_COMPUTE:					return new ComputeShaderExecutor	(renderCtx, shaderSpec);
1544		default:
1545			throw tcu::InternalError("Unsupported shader type");
1546	}
1547}
1548
1549} // ShaderExecUtil
1550} // gls
1551} // deqp
1552