1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Depth buffer performance tests.
22 *//*--------------------------------------------------------------------*/
23
24#include "es3pDepthTests.hpp"
25
26#include "glsCalibration.hpp"
27
28#include "gluShaderProgram.hpp"
29#include "gluObjectWrapper.hpp"
30#include "gluPixelTransfer.hpp"
31
32#include "glwFunctions.hpp"
33#include "glwEnums.hpp"
34
35#include "tcuTestLog.hpp"
36#include "tcuStringTemplate.hpp"
37#include "tcuCPUWarmup.hpp"
38#include "tcuCommandLine.hpp"
39#include "tcuResultCollector.hpp"
40
41#include "deClock.h"
42#include "deString.h"
43#include "deMath.h"
44#include "deStringUtil.hpp"
45#include "deRandom.hpp"
46#include "deUniquePtr.hpp"
47
48#include <vector>
49#include <algorithm>
50
51namespace deqp
52{
53namespace gles3
54{
55namespace Performance
56{
57namespace
58{
59using namespace glw;
60using de::MovePtr;
61using tcu::TestContext;
62using tcu::TestLog;
63using tcu::Vec4;
64using tcu::Vec3;
65using tcu::Vec2;
66using glu::RenderContext;
67using glu::ProgramSources;
68using glu::ShaderSource;
69using std::vector;
70using std::string;
71using std::map;
72
73struct Sample
74{
75	deInt64	nullTime;
76	deInt64	baseTime;
77	deInt64	testTime;
78	int		order;
79	int		workload;
80};
81
82struct SampleParams
83{
84	int step;
85	int measurement;
86
87	SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) {}
88};
89
90typedef vector<float> Geometry;
91
92struct ObjectData
93{
94	ProgramSources	shader;
95	Geometry		geometry;
96
97	ObjectData (const ProgramSources& shader_, const Geometry& geometry_) : shader(shader_), geometry(geometry_) {}
98};
99
100class RenderData
101{
102public:
103								RenderData		(const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log);
104								~RenderData		(void) {};
105
106	const glu::ShaderProgram	m_program;
107	const glu::VertexArray		m_vao;
108	const glu::Buffer			m_vbo;
109
110	const int					m_numVertices;
111};
112
113RenderData::RenderData (const ObjectData& object, const  glu::RenderContext& renderCtx, TestLog& log)
114	: m_program		(renderCtx, object.shader)
115	, m_vao			(renderCtx.getFunctions())
116	, m_vbo			(renderCtx.getFunctions())
117	, m_numVertices	(int(object.geometry.size())/4)
118{
119	const glw::Functions& gl = renderCtx.getFunctions();
120
121	if (!m_program.isOk())
122		log << m_program;
123
124	gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo);
125	gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW);
126	gl.bindAttribLocation(m_program.getProgram(), 0, "a_position");
127
128	gl.bindVertexArray(*m_vao);
129	gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
130	gl.enableVertexAttribArray(0);
131	gl.bindVertexArray(0);
132}
133
134namespace Utils
135{
136	vector<float> getFullscreenQuad (float depth)
137	{
138		const float data[] =
139		{
140			+1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own
141			+1.0f, -1.0f, depth, 1.0f,
142			-1.0f, -1.0f, depth, 2.0f,
143			-1.0f, -1.0f, depth, 0.0f,
144			-1.0f, +1.0f, depth, 1.0f,
145			+1.0f, +1.0f, depth, 2.0f,
146		};
147
148		return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
149	}
150
151	vector<float> getFullscreenQuadWithGradient (float depth0, float depth1)
152	{
153		const float data[] =
154		{
155			+1.0f, +1.0f, depth0, 0.0f,
156			+1.0f, -1.0f, depth0, 1.0f,
157			-1.0f, -1.0f, depth1, 2.0f,
158			-1.0f, -1.0f, depth1, 0.0f,
159			-1.0f, +1.0f, depth1, 1.0f,
160			+1.0f, +1.0f, depth0, 2.0f,
161		};
162
163		return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
164	}
165
166	vector<float> getPartScreenQuad (float coverage, float depth)
167	{
168		const float xMax	= -1.0f + 2.0f*coverage;
169		const float data[]	=
170		{
171			 xMax, +1.0f, depth, 0.0f,
172			 xMax, -1.0f, depth, 1.0f,
173			-1.0f, -1.0f, depth, 2.0f,
174			-1.0f, -1.0f, depth, 0.0f,
175			-1.0f, +1.0f, depth, 1.0f,
176			 xMax, +1.0f, depth, 2.0f,
177		};
178
179		return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
180	}
181
182	// Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise
183	vector<float> getFullScreenGrid (int resolution, deUint32 seed, float baseDepth, float depthNoise, float xyNoise)
184	{
185		const int		gridsize	= resolution+1;
186		vector<Vec3>	vertices	(gridsize*gridsize);
187		vector<float>	retval;
188		de::Random		rng			(seed);
189
190		for (int y = 0; y < gridsize; y++)
191		for (int x = 0; x < gridsize; x++)
192		{
193			const bool	isEdge	= x == 0 || y == 0 || x == resolution || y == resolution;
194			const float x_		= float(x)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise));
195			const float y_		= float(y)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise));
196			const float z_		= baseDepth + rng.getFloat(-depthNoise, +depthNoise);
197
198			vertices[y*gridsize + x] = Vec3(x_, y_, z_);
199		}
200
201		retval.reserve(resolution*resolution*6);
202
203		for (int y = 0; y < resolution; y++)
204		for (int x = 0; x < resolution; x++)
205		{
206			const Vec3& p0 = vertices[(y+0)*gridsize + (x+0)];
207			const Vec3& p1 = vertices[(y+0)*gridsize + (x+1)];
208			const Vec3& p2 = vertices[(y+1)*gridsize + (x+0)];
209			const Vec3& p3 = vertices[(y+1)*gridsize + (x+1)];
210
211			const float temp[6*4] =
212			{
213				p0.x(), p0.y(), p0.z(), 0.0f,
214				p2.x(), p2.y(), p2.z(), 1.0f,
215				p1.x(), p1.y(), p1.z(), 2.0f,
216
217				p3.x(), p3.y(), p3.z(), 0.0f,
218				p1.x(), p1.y(), p1.z(), 1.0f,
219				p2.x(), p2.y(), p2.z(), 2.0f,
220			};
221
222			retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp));
223		}
224
225		return retval;
226	}
227
228	// Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader
229	string getBaseVertexShader (void)
230	{
231		return "#version 300 es\n"
232				"in highp vec4 a_position;\n"
233				"out mediump vec3 v_bcoords;\n"
234				"void main()\n"
235				"{\n"
236				"	v_bcoords = vec3(0, 0, 0);\n"
237				"	v_bcoords[int(a_position.w)] = 1.0;\n"
238				"	gl_Position = vec4(a_position.xyz, 1.0);\n"
239				"}\n";
240	}
241
242	// Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords
243	string getInstanceNoiseVertexShader (void)
244	{
245		return "#version 300 es\n"
246				"in highp vec4 a_position;\n"
247				"out mediump vec3 v_bcoords;\n"
248				"void main()\n"
249				"{\n"
250				"	v_bcoords = vec3(0, 0, 0);\n"
251				"	v_bcoords[int(a_position.w)] = 1.0;\n"
252				"	vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), sin(float(gl_InstanceID)*1.71));\n"
253				"	gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n"
254				"}\n";
255	}
256
257	// Renders green triangles with edges highlighted. Exact shade depends on depth.
258	string getDepthAsGreenFragmentShader (void)
259	{
260		return	"#version 300 es\n"
261				"in mediump vec3 v_bcoords;\n"
262				"out mediump vec4 fragColor;\n"
263				"void main()\n"
264				"{\n"
265				"	mediump float d = gl_FragCoord.z;\n"
266				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
267				"		fragColor = vec4(d,1,d,1);\n"
268				"	else\n"
269				"		fragColor = vec4(0,d,0,1);\n"
270				"}\n";
271	}
272
273	// Renders green triangles with edges highlighted. Exact shade depends on depth.
274	string getDepthAsRedFragmentShader (void)
275	{
276		return	"#version 300 es\n"
277				"in mediump vec3 v_bcoords;\n"
278				"out mediump vec4 fragColor;\n"
279				"void main()\n"
280				"{\n"
281				"	mediump float d = gl_FragCoord.z;\n"
282				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
283				"		fragColor = vec4(1,d,d,1);\n"
284				"	else\n"
285				"		fragColor = vec4(d,0,0,1);\n"
286				"}\n";
287	}
288
289	// Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth.
290	string getArithmeticWorkloadFragmentShader (void)
291	{
292
293		return	"#version 300 es\n"
294				"in mediump vec3 v_bcoords;\n"
295				"out mediump vec4 fragColor;\n"
296				"uniform mediump int u_iterations;\n"
297				"void main()\n"
298				"{\n"
299				"	mediump float d = gl_FragCoord.z;\n"
300				"	for (int i = 0; i<u_iterations; i++)\n"
301				// cos(a)^2 + sin(a)^2 == 1. since d is in range [0,1] this will lose a few ULP's of precision per iteration but should not significantly change the value of d without extreme iteration counts
302				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
303				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
304				"		fragColor = vec4(1,d,d,1);\n"
305				"	else\n"
306				"		fragColor = vec4(d,0,0,1);\n"
307				"}\n";
308	}
309
310	// Arithmetic workload shader but contains discard
311	string getArithmeticWorkloadDiscardFragmentShader (void)
312	{
313		return	"#version 300 es\n"
314				"in mediump vec3 v_bcoords;\n"
315				"out mediump vec4 fragColor;\n"
316				"uniform mediump int u_iterations;\n"
317				"void main()\n"
318				"{\n"
319				"	mediump float d = gl_FragCoord.z;\n"
320				"	for (int i = 0; i<u_iterations; i++)\n"
321				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
322				"	if (d < 0.5) discard;\n"
323				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
324				"		fragColor = vec4(1,d,d,1);\n"
325				"	else\n"
326				"		fragColor = vec4(d,0,0,1);\n"
327				"}\n";
328	}
329
330	// Texture fetch based time waster. Renders red triangles with edges highlighted. Exact shade depends on depth.
331	string getTextureWorkloadFragmentShader (void)
332	{
333		return  "#version 300 es\n"
334				"in mediump vec3 v_bcoords;\n"
335				"out mediump vec4 fragColor;\n"
336				"uniform mediump int u_iterations;\n"
337				"uniform sampler2D u_texture;\n"
338				"void main()\n"
339				"{\n"
340				"	mediump float d = gl_FragCoord.z;\n"
341				"	for (int i = 0; i<u_iterations; i++)\n"
342				"		d *= texture(u_texture, (gl_FragCoord.xy+vec2(i))/512.0).r;\n" // Texture is expected to be fully white
343				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
344				"		fragColor = vec4(1,1,1,1);\n"
345				"	else\n"
346				"		fragColor = vec4(d,0,0,1);\n"
347				"}\n";
348	}
349
350	// Discard fragments in a grid pattern
351	string getGridDiscardFragmentShader (int gridsize)
352	{
353		const string		fragSrc = "#version 300 es\n"
354									  "in mediump vec3 v_bcoords;\n"
355									  "out mediump vec4 fragColor;\n"
356									  "void main()\n"
357									  "{\n"
358									  "	mediump float d = gl_FragCoord.z;\n"
359									  "	if ((int(gl_FragCoord.x)/${GRIDRENDER_SIZE} + int(gl_FragCoord.y)/${GRIDRENDER_SIZE})%2 == 0)\n"
360									  "		discard;\n"
361									  "	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
362									  "		fragColor = vec4(d,1,d,1);\n"
363									  "	else\n"
364									  "		fragColor = vec4(0,d,0,1);\n"
365									  "}\n";
366		map<string, string>	params;
367
368		params["GRIDRENDER_SIZE"] = de::toString(gridsize);
369
370		return tcu::StringTemplate(fragSrc).specialize(params);
371	}
372
373	// A static increment to frag depth
374	string getStaticFragDepthFragmentShader (void)
375	{
376		return	"#version 300 es\n"
377				"in mediump vec3 v_bcoords;\n"
378				"out mediump vec4 fragColor;\n"
379				"void main()\n"
380				"{\n"
381				"	mediump float d = gl_FragCoord.z;\n"
382				"	gl_FragDepth = gl_FragCoord.z + 0.1;\n"
383				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
384				"		fragColor = vec4(d,1,d,1);\n"
385				"	else\n"
386				"		fragColor = vec4(0,d,0,1);\n"
387				"}\n";
388	}
389
390	// A trivial dynamic change to frag depth
391	string getDynamicFragDepthFragmentShader (void)
392	{
393		return	"#version 300 es\n"
394				"in mediump vec3 v_bcoords;\n"
395				"out mediump vec4 fragColor;\n"
396				"void main()\n"
397				"{\n"
398				"	mediump float d = gl_FragCoord.z;\n"
399				"	gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1
400				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
401				"		fragColor = vec4(d,1,d,1);\n"
402				"	else\n"
403				"		fragColor = vec4(0,d,0,1);\n"
404				"}\n";
405	}
406
407	// A static increment to frag depth
408	string getStaticFragDepthArithmeticWorkloadFragmentShader (void)
409	{
410		return	"#version 300 es\n"
411				"in mediump vec3 v_bcoords;\n"
412				"out mediump vec4 fragColor;\n"
413				"uniform mediump int u_iterations;\n"
414				"void main()\n"
415				"{\n"
416				"	mediump float d = gl_FragCoord.z;\n"
417				"	gl_FragDepth = gl_FragCoord.z + 0.1;\n"
418				"	for (int i = 0; i<u_iterations; i++)\n"
419				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
420				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
421				"		fragColor = vec4(1,d,d,1);\n"
422				"	else\n"
423				"		fragColor = vec4(d,0,0,1);\n"
424				"}\n";
425	}
426
427	// A trivial dynamic change to frag depth
428	string getDynamicFragDepthArithmeticWorkloadFragmentShader (void)
429	{
430		return	"#version 300 es\n"
431				"in mediump vec3 v_bcoords;\n"
432				"out mediump vec4 fragColor;\n"
433				"uniform mediump int u_iterations;\n"
434				"void main()\n"
435				"{\n"
436				"	mediump float d = gl_FragCoord.z;\n"
437				"	gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1
438				"	for (int i = 0; i<u_iterations; i++)\n"
439				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
440				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
441				"		fragColor = vec4(1,d,d,1);\n"
442				"	else\n"
443				"		fragColor = vec4(d,0,0,1);\n"
444				"}\n";
445	}
446
447	glu::ProgramSources getBaseShader (void)
448	{
449		return glu::makeVtxFragSources(getBaseVertexShader(), getDepthAsGreenFragmentShader());
450	}
451
452	glu::ProgramSources getArithmeticWorkloadShader (void)
453	{
454		return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadFragmentShader());
455	}
456
457	glu::ProgramSources getArithmeticWorkloadDiscardShader (void)
458	{
459		return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadDiscardFragmentShader());
460	}
461
462	glu::ProgramSources getTextureWorkloadShader (void)
463	{
464		return glu::makeVtxFragSources(getBaseVertexShader(), getTextureWorkloadFragmentShader());
465	}
466
467	glu::ProgramSources getGridDiscardShader (int gridsize)
468	{
469		return glu::makeVtxFragSources(getBaseVertexShader(), getGridDiscardFragmentShader(gridsize));
470	}
471
472	inline ObjectData quadWith (const glu::ProgramSources& shader, float depth)
473	{
474		return ObjectData(shader, getFullscreenQuad(depth));
475	}
476
477	inline ObjectData quadWith (const string& fragShader, float depth)
478	{
479		return ObjectData(glu::makeVtxFragSources(getBaseVertexShader(), fragShader), getFullscreenQuad(depth));
480	}
481
482	inline ObjectData variableQuad (float depth)
483	{
484		return ObjectData(glu::makeVtxFragSources(getInstanceNoiseVertexShader(), getDepthAsRedFragmentShader()), getFullscreenQuad(depth));
485	}
486
487	inline ObjectData fastQuad (float depth)
488	{
489		return ObjectData(getBaseShader(), getFullscreenQuad(depth));
490	}
491
492	inline ObjectData slowQuad (float depth)
493	{
494		return ObjectData(getArithmeticWorkloadShader(), getFullscreenQuad(depth));
495	}
496
497	inline ObjectData fastQuadWithGradient (float depth0, float depth1)
498	{
499		return ObjectData(getBaseShader(), getFullscreenQuadWithGradient(depth0, depth1));
500	}
501} // Utils
502
503// Shared base
504class BaseCase : public tcu::TestCase
505{
506public:
507	enum {RENDER_SIZE = 512};
508
509							BaseCase			(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
510	virtual					~BaseCase			(void) {}
511
512	virtual IterateResult	iterate				(void);
513
514protected:
515	void					logSamples			(const vector<Sample>& samples, const string& name, const string& desc);
516	void					logGeometry			(const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg);
517	virtual void			logAnalysis			(const vector<Sample>& samples) = 0;
518	virtual void			logDescription		(void) = 0;
519
520	virtual ObjectData		genOccluderGeometry	(void) const = 0;
521	virtual ObjectData		genOccludedGeometry	(void) const = 0;
522
523	virtual int				calibrate			(void) const = 0;
524	virtual Sample			renderSample		(const RenderData& occluder, const RenderData& occluded, int workload) const = 0;
525
526	void					render				(const RenderData& data) const;
527	void					render				(const RenderData& data, int instances) const;
528
529	const RenderContext&	m_renderCtx;
530	tcu::ResultCollector	m_results;
531
532	enum {ITERATION_STEPS = 10, ITERATION_SAMPLES = 16};
533};
534
535BaseCase::BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
536	: TestCase		(testCtx, tcu::NODETYPE_PERFORMANCE, name, desc)
537	, m_renderCtx	(renderCtx)
538{
539}
540
541BaseCase::IterateResult BaseCase::iterate (void)
542{
543	typedef de::MovePtr<RenderData> RenderDataP;
544
545	const glw::Functions&	gl					= m_renderCtx.getFunctions();
546	TestLog&				log					= m_testCtx.getLog();
547
548	const glu::Framebuffer	framebuffer			(gl);
549	const glu::Renderbuffer	renderbuffer		(gl);
550	const glu::Renderbuffer	depthbuffer			(gl);
551
552	vector<Sample>			results;
553	vector<int>				params;
554	RenderDataP				occluderData;
555	RenderDataP				occludedData;
556	tcu::TextureLevel		resultTex			(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE);
557	int						maxWorkload			= 0;
558	de::Random				rng					(deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed());
559
560	logDescription();
561
562	gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer);
563	gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE);
564	gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer);
565	gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE);
566
567	gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
568	gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer);
569	gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer);
570	gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE);
571	gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f);
572
573	maxWorkload = calibrate();
574
575	// Setup data
576	occluderData = RenderDataP(new RenderData (genOccluderGeometry(), m_renderCtx, log));
577	occludedData = RenderDataP(new RenderData (genOccludedGeometry(), m_renderCtx, log));
578
579	TCU_CHECK(occluderData->m_program.isOk());
580	TCU_CHECK(occludedData->m_program.isOk());
581
582	// Force initialization of GPU resources
583	gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
584	gl.enable(GL_DEPTH_TEST);
585
586	render(*occluderData);
587	render(*occludedData);
588	glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess());
589
590	logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program);
591
592	params.reserve(ITERATION_STEPS*ITERATION_SAMPLES);
593
594	// Setup parameters
595	for (int step = 0; step < ITERATION_STEPS; step++)
596	{
597		const int workload = maxWorkload*step/ITERATION_STEPS;
598
599		for (int count = 0; count < ITERATION_SAMPLES; count++)
600			params.push_back(workload);
601	}
602
603	rng.shuffle(params.begin(), params.end());
604
605	// Render samples
606	for (size_t ndx = 0; ndx < params.size(); ndx++)
607	{
608		const int	workload	= params[ndx];
609		Sample		sample		= renderSample(*occluderData, *occludedData, workload);
610
611		sample.workload = workload;
612		sample.order = int(ndx);
613
614		results.push_back(sample);
615	}
616
617	logSamples(results, "Samples", "Samples");
618	logAnalysis(results);
619
620	m_results.setTestContextResult(m_testCtx);
621
622	return STOP;
623}
624
625void BaseCase::logSamples (const vector<Sample>& samples, const string& name, const string& desc)
626{
627	TestLog& log = m_testCtx.getLog();
628
629	bool testOnly = true;
630
631	for (size_t ndx = 0; ndx < samples.size(); ndx++)
632	{
633		if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0)
634		{
635			testOnly = false;
636			break;
637		}
638	}
639
640	log << TestLog::SampleList(name, desc);
641
642	if (testOnly)
643	{
644		log << TestLog::SampleInfo
645			<< TestLog::ValueInfo("Workload",	"Workload",			"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
646			<< TestLog::ValueInfo("Order",		"Order of sample",	"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
647			<< TestLog::ValueInfo("TestTime",	"Test render time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
648			<< TestLog::EndSampleInfo;
649
650		for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++)
651		{
652			const Sample& sample = samples[sampleNdx];
653
654			log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample;
655		}
656	}
657	else
658	{
659		log << TestLog::SampleInfo
660			<< TestLog::ValueInfo("Workload",	"Workload",			"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
661			<< TestLog::ValueInfo("Order",		"Order of sample",	"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
662			<< TestLog::ValueInfo("TestTime",	"Test render time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
663			<< TestLog::ValueInfo("NullTime",	"Read pixels time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
664			<< TestLog::ValueInfo("BaseTime",	"Base render time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
665			<< TestLog::EndSampleInfo;
666
667		for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++)
668		{
669			const Sample& sample = samples[sampleNdx];
670
671			log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample;
672		}
673	}
674
675	log << TestLog::EndSampleList;
676}
677
678void BaseCase::logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg)
679{
680	TestLog& log = m_testCtx.getLog();
681
682	log << TestLog::Section("Geometry", "Geometry");
683	log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage;
684	log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage;
685	log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage;
686
687	log << TestLog::Image("Test Geometry", "Test Geometry",  sample);
688	log << TestLog::EndSection;
689
690	log << TestLog::Section("Occluder", "Occluder");
691	log << occluderProg;
692	log << TestLog::EndSection;
693
694	log << TestLog::Section("Occluded", "Occluded");
695	log << occludedProg;
696	log << TestLog::EndSection;
697}
698
699void BaseCase::render (const RenderData& data) const
700{
701	const glw::Functions& gl = m_renderCtx.getFunctions();
702
703	gl.useProgram(data.m_program.getProgram());
704
705	gl.bindVertexArray(*data.m_vao);
706	gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices);
707	gl.bindVertexArray(0);
708}
709
710void BaseCase::render (const RenderData& data, int instances) const
711{
712	const glw::Functions& gl = m_renderCtx.getFunctions();
713
714	gl.useProgram(data.m_program.getProgram());
715
716	gl.bindVertexArray(*data.m_vao);
717	gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances);
718	gl.bindVertexArray(0);
719}
720
721// Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression
722class RenderCountCase : public BaseCase
723{
724public:
725					RenderCountCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
726					~RenderCountCase	(void) {}
727
728protected:
729	virtual void	logAnalysis			(const vector<Sample>& samples);
730
731private:
732	virtual int		calibrate			(void) const;
733	virtual Sample	renderSample		(const RenderData& occluder, const RenderData& occluded, int callcount) const;
734};
735
736RenderCountCase::RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
737	: BaseCase	(testCtx, renderCtx, name, desc)
738{
739}
740
741void RenderCountCase::logAnalysis (const vector<Sample>& samples)
742{
743	using namespace gls;
744
745	TestLog&		log			= m_testCtx.getLog();
746	int				maxWorkload	= 0;
747	vector<Vec2>	testSamples	(samples.size());
748
749	for (size_t ndx = 0; ndx < samples.size(); ndx++)
750	{
751		const Sample& sample = samples[ndx];
752
753		testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime);
754
755		maxWorkload = de::max(maxWorkload, sample.workload);
756	}
757
758	{
759		const float							confidence	= 0.60f;
760		const LineParametersWithConfidence	testParam	= theilSenSiegelLinearRegression(testSamples, confidence);
761		const float							usPerCall	= testParam.coefficient;
762		const float							pxPerCall	= RENDER_SIZE*RENDER_SIZE;
763		const float							pxPerUs		= pxPerCall/usPerCall;
764		const float							mpxPerS		= pxPerUs;
765
766		log << TestLog::Section("Linear Regression", "Linear Regression");
767		log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage;
768		log << TestLog::Message << "Render time for scene with depth test was\n\t"
769			<< "[" << testParam.offsetConfidenceLower << ", " << testParam.offset <<  ", " << testParam.offsetConfidenceUpper << "]us +"
770			<< "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]"
771			<< "us/workload" << TestLog::EndMessage;
772		log << TestLog::EndSection;
773
774		log << TestLog::Section("Result", "Result");
775
776		if (testParam.coefficientConfidenceLower < 0.0f)
777		{
778			log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible per-pixel cost" << TestLog::EndMessage;
779			m_results.addResult(QP_TEST_RESULT_PASS, "Pass");
780		}
781		else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25)
782		{
783			log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage;
784			m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
785		}
786		else
787		{
788			log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage;
789			m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2));
790		}
791
792		log << TestLog::EndSection;
793	}
794}
795
796Sample RenderCountCase::renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const
797{
798	const glw::Functions&	gl		= m_renderCtx.getFunctions();
799	Sample					sample;
800	deUint64				now		= 0;
801	deUint64				prev	= 0;
802	deUint8					buffer[4];
803
804	// Stabilize
805	{
806		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
807		gl.enable(GL_DEPTH_TEST);
808		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
809	}
810
811	prev = deGetMicroseconds();
812
813	gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
814	gl.enable(GL_DEPTH_TEST);
815
816	render(occluder);
817	render(occluded, callcount);
818
819	gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
820
821	now = deGetMicroseconds();
822
823	sample.testTime = now - prev;
824	sample.baseTime = 0;
825	sample.nullTime = 0;
826	sample.workload = callcount;
827
828	return sample;
829}
830
831int RenderCountCase::calibrate (void) const
832{
833	using namespace gls;
834
835	const glw::Functions&	gl					= m_renderCtx.getFunctions();
836	TestLog&				log					= m_testCtx.getLog();
837
838	const RenderData		occluderGeometry	(genOccluderGeometry(), m_renderCtx, log);
839	const RenderData		occludedGeometry	(genOccludedGeometry(), m_renderCtx, log);
840
841	TheilSenCalibrator		calibrator			(CalibratorParameters(20, // Initial workload
842																	  10, // Max iteration frames
843																	  20.0f, // Iteration shortcut threshold ms
844																	  20, // Max iterations
845																	  33.0f, // Target frame time
846																	  40.0f, // Frame time cap
847																	  1000.0f // Target measurement duration
848																	  ));
849
850	while (true)
851	{
852		switch(calibrator.getState())
853		{
854			case TheilSenCalibrator::STATE_FINISHED:
855				logCalibrationInfo(m_testCtx.getLog(), calibrator);
856				return calibrator.getCallCount();
857
858			case TheilSenCalibrator::STATE_MEASURE:
859			{
860				deUint8	buffer[4];
861				deInt64 now;
862				deInt64 prev;
863
864				prev = deGetMicroseconds();
865
866				gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
867				gl.disable(GL_DEPTH_TEST);
868
869				render(occluderGeometry);
870				render(occludedGeometry, calibrator.getCallCount());
871
872				gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
873
874				now = deGetMicroseconds();
875
876				calibrator.recordIteration(now - prev);
877				break;
878			}
879
880			case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS:
881				calibrator.recomputeParameters();
882				break;
883			default:
884				DE_ASSERT(false);
885				return 1;
886		}
887	}
888}
889
890// Compares time/workload gradients of same geometry with and without depth testing
891class RelativeChangeCase : public BaseCase
892{
893public:
894					RelativeChangeCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
895	virtual			~RelativeChangeCase	(void) {}
896
897protected:
898	Sample			renderSample		(const RenderData& occluder, const RenderData& occluded, int workload) const;
899
900	virtual void	logAnalysis			(const vector<Sample>& samples);
901
902private:
903	int				calibrate			(void) const;
904};
905
906RelativeChangeCase::RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
907	: BaseCase		(testCtx, renderCtx, name, desc)
908{
909}
910
911int RelativeChangeCase::calibrate (void) const
912{
913	using namespace gls;
914
915	const glw::Functions&	gl		= m_renderCtx.getFunctions();
916	TestLog&				log		= m_testCtx.getLog();
917
918	const RenderData		geom	(genOccludedGeometry(), m_renderCtx, log);
919
920	TheilSenCalibrator calibrator(CalibratorParameters( 20, // Initial workload
921														10, // Max iteration frames
922														20.0f, // Iteration shortcut threshold ms
923														20, // Max iterations
924														33.0f, // Target frame time
925														40.0f, // Frame time cap
926														1000.0f // Target measurement duration
927														));
928
929	while (true)
930	{
931		switch(calibrator.getState())
932		{
933			case TheilSenCalibrator::STATE_FINISHED:
934				logCalibrationInfo(m_testCtx.getLog(), calibrator);
935				return calibrator.getCallCount();
936
937			case TheilSenCalibrator::STATE_MEASURE:
938			{
939				deUint8			buffer[4];
940				const GLuint	program	= geom.m_program.getProgram();
941
942				gl.useProgram(program);
943				gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount());
944
945				const deInt64 prev = deGetMicroseconds();
946
947				gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
948				gl.disable(GL_DEPTH_TEST);
949
950				render(geom);
951
952				gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
953
954				const deInt64 now = deGetMicroseconds();
955
956				calibrator.recordIteration(now - prev);
957				break;
958			}
959
960			case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS:
961				calibrator.recomputeParameters();
962				break;
963			default:
964				DE_ASSERT(false);
965				return 1;
966		}
967	}
968}
969
970Sample RelativeChangeCase::renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const
971{
972	const glw::Functions&	gl		= m_renderCtx.getFunctions();
973	const GLuint			program	= occluded.m_program.getProgram();
974	Sample					sample;
975	deUint64				now		= 0;
976	deUint64				prev	= 0;
977	deUint8					buffer[4];
978
979	gl.useProgram(program);
980	gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload);
981
982	// Warmup (this workload seems to reduce variation in following workloads)
983	{
984		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
985		gl.disable(GL_DEPTH_TEST);
986
987		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
988	}
989
990	// Null time
991	{
992		prev = deGetMicroseconds();
993
994		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
995		gl.disable(GL_DEPTH_TEST);
996
997		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
998
999		now = deGetMicroseconds();
1000
1001		sample.nullTime = now - prev;
1002	}
1003
1004	// Test time
1005	{
1006		prev = deGetMicroseconds();
1007
1008		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1009		gl.enable(GL_DEPTH_TEST);
1010
1011		render(occluder);
1012		render(occluded);
1013
1014		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1015
1016		now = deGetMicroseconds();
1017
1018		sample.testTime = now - prev;
1019	}
1020
1021	// Base time
1022	{
1023		prev = deGetMicroseconds();
1024
1025		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1026		gl.disable(GL_DEPTH_TEST);
1027
1028		render(occluder);
1029		render(occluded);
1030
1031		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1032
1033		now = deGetMicroseconds();
1034
1035		sample.baseTime = now - prev;
1036	}
1037
1038	sample.workload = 0;
1039
1040	return sample;
1041}
1042
1043void RelativeChangeCase::logAnalysis (const vector<Sample>& samples)
1044{
1045	using namespace gls;
1046
1047	TestLog&		log			= m_testCtx.getLog();
1048
1049	int				maxWorkload	= 0;
1050
1051	vector<Vec2>	nullSamples	(samples.size());
1052	vector<Vec2>	baseSamples	(samples.size());
1053	vector<Vec2>	testSamples	(samples.size());
1054
1055	for (size_t ndx = 0; ndx < samples.size(); ndx++)
1056	{
1057		const Sample& sample = samples[ndx];
1058
1059		nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime);
1060		baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime);
1061		testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime);
1062
1063		maxWorkload = de::max(maxWorkload, sample.workload);
1064	}
1065
1066	{
1067		const float							confidence	= 0.60f;
1068
1069		const LineParametersWithConfidence	nullParam	= theilSenSiegelLinearRegression(nullSamples, confidence);
1070		const LineParametersWithConfidence	baseParam	= theilSenSiegelLinearRegression(baseSamples, confidence);
1071		const LineParametersWithConfidence	testParam	= theilSenSiegelLinearRegression(testSamples, confidence);
1072
1073		if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper))
1074		{
1075			m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant");
1076			log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage;
1077		}
1078
1079		if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper))
1080		{
1081			m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration");
1082			log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage;
1083		}
1084
1085		log << TestLog::Section("Linear Regression", "Linear Regression");
1086		log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage;
1087
1088		log << TestLog::Message << "Render time for empty scene was\n\t"
1089			<< "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset <<  ", " << nullParam.offsetConfidenceUpper << "]us +"
1090			<< "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]"
1091			<< "us/workload" << TestLog::EndMessage;
1092
1093		log << TestLog::Message << "Render time for scene without depth test was\n\t"
1094			<< "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset <<  ", " << baseParam.offsetConfidenceUpper << "]us +"
1095			<< "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]"
1096			<< "us/workload" << TestLog::EndMessage;
1097
1098		log << TestLog::Message << "Render time for scene with depth test was\n\t"
1099			<< "[" << testParam.offsetConfidenceLower << ", " << testParam.offset <<  ", " << testParam.offsetConfidenceUpper << "]us +"
1100			<< "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]"
1101			<< "us/workload" << TestLog::EndMessage;
1102
1103		log << TestLog::EndSection;
1104
1105		if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper))
1106		{
1107			log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage;
1108			m_results.addResult(QP_TEST_RESULT_PASS, "0.0");
1109		}
1110		else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25)
1111		{
1112			log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage;
1113			m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
1114		}
1115		else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper*0.25)
1116		{
1117			log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage;
1118			m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
1119		}
1120		else
1121		{
1122			log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage;
1123			m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient)/de::abs(baseParam.coefficient), 2));
1124		}
1125	}
1126}
1127
1128// Speed of trivial culling
1129class BaseCostCase : public RenderCountCase
1130{
1131public:
1132						BaseCostCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1133							: RenderCountCase (testCtx, renderCtx, name, desc) {}
1134
1135						~BaseCostCase		(void) {}
1136
1137private:
1138	virtual ObjectData	genOccluderGeometry	(void) const { return Utils::fastQuad(0.2f); }
1139	virtual ObjectData	genOccludedGeometry	(void) const { return Utils::variableQuad(0.8f); }
1140
1141	virtual void		logDescription		(void)
1142	{
1143		TestLog& log = m_testCtx.getLog();
1144
1145		log << TestLog::Section("Description", "Test description");
1146		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1147		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1148		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered"  << TestLog::EndMessage;
1149		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1150		log << TestLog::EndSection;
1151	}
1152};
1153
1154// Gradient
1155class GradientCostCase : public RenderCountCase
1156{
1157public:
1158						GradientCostCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float gradientDistance)
1159							: RenderCountCase		(testCtx, renderCtx, name, desc)
1160							, m_gradientDistance	(gradientDistance)
1161						{
1162						}
1163
1164						~GradientCostCase	(void) {}
1165
1166private:
1167	virtual ObjectData	genOccluderGeometry	(void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); }
1168	virtual ObjectData	genOccludedGeometry	(void) const
1169	{
1170		return ObjectData(glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f));
1171	}
1172
1173	virtual void		logDescription		(void)
1174	{
1175		TestLog& log = m_testCtx.getLog();
1176
1177		log << TestLog::Section("Description", "Test description");
1178		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1179		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1180		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1181		log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge of the occluding quad has a depth of 0.0." << TestLog::EndMessage;
1182		log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage;
1183		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1184		log << TestLog::EndSection;
1185	}
1186
1187	const float			m_gradientDistance;
1188};
1189
1190// Constant offset to frag depth in occluder
1191class OccluderStaticFragDepthCostCase : public RenderCountCase
1192{
1193public:
1194						OccluderStaticFragDepthCostCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1195							: RenderCountCase(testCtx, renderCtx, name, desc)
1196						{
1197						}
1198
1199						~OccluderStaticFragDepthCostCase	(void) {}
1200
1201private:
1202	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1203	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::fastQuad(0.8f); }
1204
1205	virtual void		logDescription						(void)
1206	{
1207		TestLog& log = m_testCtx.getLog();
1208
1209		log << TestLog::Section("Description", "Test description");
1210		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1211		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1212		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1213		log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1214		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1215		log << TestLog::EndSection;
1216	}
1217};
1218
1219// Dynamic offset to frag depth in occluder
1220class OccluderDynamicFragDepthCostCase : public RenderCountCase
1221{
1222public:
1223						OccluderDynamicFragDepthCostCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1224							: RenderCountCase(testCtx, renderCtx, name, desc)
1225						{
1226						}
1227
1228						~OccluderDynamicFragDepthCostCase	(void) {}
1229
1230private:
1231	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1232	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::fastQuad(0.8f); }
1233
1234	virtual void		logDescription						(void)
1235	{
1236		TestLog& log = m_testCtx.getLog();
1237
1238		log << TestLog::Section("Description", "Test description");
1239		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1240		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1241		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1242		log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1243		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1244		log << TestLog::EndSection;
1245	}
1246};
1247
1248// Constant offset to frag depth in occluder
1249class OccludedStaticFragDepthCostCase : public RenderCountCase
1250{
1251public:
1252						OccludedStaticFragDepthCostCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1253							: RenderCountCase(testCtx, renderCtx, name, desc)
1254						{
1255						}
1256
1257						~OccludedStaticFragDepthCostCase	(void) {}
1258
1259private:
1260	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1261	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1262
1263	virtual void		logDescription						(void)
1264	{
1265		TestLog& log = m_testCtx.getLog();
1266
1267		log << TestLog::Section("Description", "Test description");
1268		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1269		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1270		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1271		log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1272		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1273		log << TestLog::EndSection;
1274	}
1275};
1276
1277// Dynamic offset to frag depth in occluder
1278class OccludedDynamicFragDepthCostCase : public RenderCountCase
1279{
1280public:
1281						OccludedDynamicFragDepthCostCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1282							: RenderCountCase(testCtx, renderCtx, name, desc)
1283						{
1284						}
1285
1286						~OccludedDynamicFragDepthCostCase	(void) {}
1287
1288private:
1289	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1290	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1291
1292	virtual void		logDescription						(void)
1293	{
1294		TestLog& log = m_testCtx.getLog();
1295
1296		log << TestLog::Section("Description", "Test description");
1297		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1298		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1299		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1300		log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1301		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1302		log << TestLog::EndSection;
1303	}
1304};
1305
1306// Culling speed with slightly less trivial geometry
1307class OccludingGeometryComplexityCostCase : public RenderCountCase
1308{
1309public:
1310						OccludingGeometryComplexityCostCase		(TestContext&			testCtx,
1311																 const RenderContext&	renderCtx,
1312																 const char*			name,
1313																 const char*			desc,
1314																 int					resolution,
1315																 float					xyNoise,
1316																 float					zNoise)
1317							: RenderCountCase	(testCtx, renderCtx, name, desc)
1318							, m_resolution		(resolution)
1319							, m_xyNoise			(xyNoise)
1320							, m_zNoise			(zNoise)
1321						{
1322						}
1323
1324						~OccludingGeometryComplexityCostCase	(void) {}
1325
1326private:
1327	virtual ObjectData	genOccluderGeometry						(void) const
1328	{
1329		return ObjectData(Utils::getBaseShader(),
1330						  Utils::getFullScreenGrid(m_resolution,
1331						  deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(),
1332						  0.2f,
1333						  m_zNoise,
1334						  m_xyNoise));
1335	}
1336
1337	virtual ObjectData	genOccludedGeometry						(void) const { return Utils::variableQuad(0.8f); }
1338
1339	virtual void		logDescription		(void)
1340	{
1341		TestLog& log = m_testCtx.getLog();
1342
1343		log << TestLog::Section("Description", "Test description");
1344		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1345		log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage;
1346		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered"  << TestLog::EndMessage;
1347		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1348		log << TestLog::EndSection;
1349	}
1350
1351	const int			m_resolution;
1352	const float			m_xyNoise;
1353	const float			m_zNoise;
1354};
1355
1356
1357// Cases with varying workloads in the fragment shader
1358class FragmentWorkloadCullCase : public RelativeChangeCase
1359{
1360public:
1361						FragmentWorkloadCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
1362	virtual				~FragmentWorkloadCullCase	(void) {}
1363
1364private:
1365	virtual ObjectData	genOccluderGeometry			(void) const { return Utils::fastQuad(0.2f); }
1366
1367	virtual void		logDescription				(void);
1368};
1369
1370FragmentWorkloadCullCase::FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1371	: RelativeChangeCase	(testCtx, renderCtx, name, desc)
1372{
1373}
1374
1375void FragmentWorkloadCullCase::logDescription (void)
1376{
1377	TestLog& log = m_testCtx.getLog();
1378
1379	log << TestLog::Section("Description", "Test description");
1380	log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage;
1381	log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader,"
1382		"the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1383	log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1384	log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1385	log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1386	log << TestLog::EndSection;
1387}
1388
1389// Additional workload consists of texture lookups
1390class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase
1391{
1392public:
1393						FragmentTextureWorkloadCullCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
1394	virtual				~FragmentTextureWorkloadCullCase	(void) {}
1395
1396	virtual void		init								(void);
1397	virtual void		deinit								(void);
1398
1399private:
1400	typedef MovePtr<glu::Texture> TexPtr;
1401
1402	virtual ObjectData	genOccludedGeometry					(void) const
1403	{
1404		return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f));
1405	}
1406
1407	TexPtr				m_texture;
1408};
1409
1410FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1411	: FragmentWorkloadCullCase	(testCtx, renderCtx, name, desc)
1412{
1413}
1414
1415void FragmentTextureWorkloadCullCase::init (void)
1416{
1417	const glw::Functions&	gl		= m_renderCtx.getFunctions();
1418	const int				size	= 128;
1419	const vector<deUint8>	data	(size*size*4, 255);
1420
1421	m_texture = MovePtr<glu::Texture>(new glu::Texture(gl));
1422
1423	gl.bindTexture(GL_TEXTURE_2D, m_texture);
1424	gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
1425	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1426	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1427}
1428
1429void FragmentTextureWorkloadCullCase::deinit (void)
1430{
1431	m_texture.clear();
1432}
1433
1434// Additional workload consists of arithmetic
1435class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase
1436{
1437public:
1438						FragmentArithmeticWorkloadCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1439						: FragmentWorkloadCullCase	(testCtx, renderCtx, name, desc)
1440					{
1441					}
1442	virtual				~FragmentArithmeticWorkloadCullCase	(void) {}
1443
1444private:
1445	virtual ObjectData	genOccludedGeometry					(void) const
1446	{
1447		return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f));
1448	}
1449};
1450
1451// Contains dynamicly unused discard after a series of calculations
1452class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase
1453{
1454public:
1455						FragmentDiscardArithmeticWorkloadCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1456						: FragmentWorkloadCullCase	(testCtx, renderCtx, name, desc)
1457					{
1458					}
1459
1460	virtual				~FragmentDiscardArithmeticWorkloadCullCase	(void) {}
1461
1462private:
1463	virtual ObjectData	genOccludedGeometry							(void) const
1464	{
1465		return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f));
1466	}
1467
1468	virtual void		logDescription								(void)
1469	{
1470		TestLog& log = m_testCtx.getLog();
1471
1472		log << TestLog::Section("Description", "Test description");
1473		log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage;
1474		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader,"
1475			"the second (occluded) contains significant fragment shader work and a discard that is never triggers but has a dynamic condition" << TestLog::EndMessage;
1476		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1477		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1478		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1479		log << TestLog::EndSection;
1480	}
1481};
1482
1483// Discards fragments from the occluder in a grid pattern
1484class PartialOccluderDiscardCullCase : public RelativeChangeCase
1485{
1486public:
1487						PartialOccluderDiscardCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int gridsize)
1488							: RelativeChangeCase		(testCtx, renderCtx, name, desc)
1489							, m_gridsize	(gridsize)
1490						{
1491						}
1492	virtual				~PartialOccluderDiscardCullCase	(void) {}
1493
1494private:
1495	virtual ObjectData	genOccluderGeometry				(void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); }
1496	virtual ObjectData	genOccludedGeometry				(void) const { return Utils::slowQuad(0.8f); }
1497
1498	virtual void		logDescription					(void)
1499	{
1500		TestLog& log = m_testCtx.getLog();
1501
1502		log << TestLog::Section("Description", "Test description");
1503		log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage;
1504		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the "
1505			"fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage;
1506		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1507		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1508		log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time"  << TestLog::EndMessage;
1509		log << TestLog::EndSection;
1510	}
1511
1512	const int			m_gridsize;
1513};
1514
1515// Trivial occluder covering part of screen
1516class PartialOccluderCullCase : public RelativeChangeCase
1517{
1518public:
1519						PartialOccluderCullCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float coverage)
1520							: RelativeChangeCase		(testCtx, renderCtx, name, desc)
1521							, m_coverage	(coverage)
1522						{
1523						}
1524						~PartialOccluderCullCase	(void) {}
1525
1526private:
1527	virtual ObjectData	genOccluderGeometry			(void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); }
1528	virtual ObjectData	genOccludedGeometry			(void) const {return Utils::slowQuad(0.8f); }
1529
1530	virtual void		logDescription				(void)
1531	{
1532		TestLog& log = m_testCtx.getLog();
1533
1534		log << TestLog::Section("Description", "Test description");
1535		log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage;
1536		log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage*100.0f
1537			<< "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment shader work" << TestLog::EndMessage;
1538		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1539		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1540		log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area"  << TestLog::EndMessage;
1541		log << TestLog::EndSection;
1542	}
1543
1544	const float			m_coverage;
1545};
1546
1547// Constant offset to frag depth in occluder
1548class StaticOccluderFragDepthCullCase : public RelativeChangeCase
1549{
1550public:
1551						StaticOccluderFragDepthCullCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1552							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1553						{
1554						}
1555
1556						~StaticOccluderFragDepthCullCase	(void) {}
1557
1558private:
1559	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1560	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::slowQuad(0.8f); }
1561
1562	virtual void		logDescription						(void)
1563	{
1564		TestLog& log = m_testCtx.getLog();
1565
1566		log << TestLog::Section("Description", "Test description");
1567		log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage;
1568		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1569		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1570		log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1571		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1572		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1573		log << TestLog::EndSection;
1574	}
1575};
1576
1577// Dynamic offset to frag depth in occluder
1578class DynamicOccluderFragDepthCullCase : public RelativeChangeCase
1579{
1580public:
1581						DynamicOccluderFragDepthCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1582							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1583						{
1584						}
1585
1586						~DynamicOccluderFragDepthCullCase	(void) {}
1587
1588private:
1589	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1590	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::slowQuad(0.8f); }
1591
1592	virtual void		logDescription						(void)
1593	{
1594		TestLog& log = m_testCtx.getLog();
1595
1596		log << TestLog::Section("Description", "Test description");
1597		log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage;
1598		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1599		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1600		log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1601		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1602		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1603		log << TestLog::EndSection;
1604	}
1605};
1606
1607// Constant offset to frag depth in occluded
1608class StaticOccludedFragDepthCullCase : public RelativeChangeCase
1609{
1610public:
1611						StaticOccludedFragDepthCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1612							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1613						{
1614						}
1615
1616						~StaticOccludedFragDepthCullCase	(void) {}
1617
1618private:
1619	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1620	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); }
1621
1622	virtual void		logDescription						(void)
1623	{
1624		TestLog& log = m_testCtx.getLog();
1625
1626		log << TestLog::Section("Description", "Test description");
1627		log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage;
1628		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1629		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1630		log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1631		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1632		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1633		log << TestLog::EndSection;
1634	}
1635};
1636
1637// Dynamic offset to frag depth in occluded
1638class DynamicOccludedFragDepthCullCase : public RelativeChangeCase
1639{
1640public:
1641						DynamicOccludedFragDepthCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1642							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1643						{
1644						}
1645
1646						~DynamicOccludedFragDepthCullCase	(void) {}
1647
1648private:
1649	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1650	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); }
1651
1652	virtual void		logDescription						(void)
1653	{
1654		TestLog& log = m_testCtx.getLog();
1655
1656		log << TestLog::Section("Description", "Test description");
1657		log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage;
1658		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1659		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1660		log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1661		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1662		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1663		log << TestLog::EndSection;
1664	}
1665};
1666
1667// Dynamic offset to frag depth in occluded
1668class ReversedDepthOrderCullCase : public RelativeChangeCase
1669{
1670public:
1671						ReversedDepthOrderCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1672							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1673						{
1674						}
1675
1676						~ReversedDepthOrderCullCase	(void) {}
1677
1678private:
1679	virtual ObjectData	genOccluderGeometry			(void) const { return Utils::fastQuad(0.2f); }
1680	virtual ObjectData	genOccludedGeometry			(void) const { return Utils::slowQuad(0.8f); }
1681
1682	virtual void		logDescription				(void)
1683	{
1684		TestLog& log = m_testCtx.getLog();
1685
1686		log << TestLog::Section("Description", "Test description");
1687		log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage;
1688		log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1689		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1690		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1691		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1692		log << TestLog::EndSection;
1693	}
1694
1695	// Rendering order of occluder & occluded is reversed, otherwise identical to parent version
1696	Sample				renderSample				(const RenderData& occluder, const RenderData& occluded, int workload) const
1697	{
1698		const glw::Functions&	gl		= m_renderCtx.getFunctions();
1699		const GLuint			program	= occluded.m_program.getProgram();
1700		Sample					sample;
1701		deUint64				now		= 0;
1702		deUint64				prev	= 0;
1703		deUint8					buffer[4];
1704
1705		gl.useProgram(program);
1706		gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload);
1707
1708		// Warmup (this workload seems to reduce variation in following workloads)
1709		{
1710			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1711			gl.disable(GL_DEPTH_TEST);
1712
1713			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1714		}
1715
1716		// Null time
1717		{
1718			prev = deGetMicroseconds();
1719
1720			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1721			gl.disable(GL_DEPTH_TEST);
1722
1723			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1724
1725			now = deGetMicroseconds();
1726
1727			sample.nullTime = now - prev;
1728		}
1729
1730		// Test time
1731		{
1732			prev = deGetMicroseconds();
1733
1734			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1735			gl.enable(GL_DEPTH_TEST);
1736
1737			render(occluded);
1738			render(occluder);
1739
1740			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1741
1742			now = deGetMicroseconds();
1743
1744			sample.testTime = now - prev;
1745		}
1746
1747		// Base time
1748		{
1749			prev = deGetMicroseconds();
1750
1751			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1752			gl.disable(GL_DEPTH_TEST);
1753
1754			render(occluded);
1755			render(occluder);
1756
1757			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1758
1759			now = deGetMicroseconds();
1760
1761			sample.baseTime = now - prev;
1762		}
1763
1764		sample.workload = 0;
1765
1766		return sample;
1767	}
1768};
1769
1770} // Anonymous
1771
1772DepthTests::DepthTests (Context& context)
1773	: TestCaseGroup (context, "depth", "Depth culling performance")
1774{
1775}
1776
1777void DepthTests::init (void)
1778{
1779	TestContext&			testCtx		= m_context.getTestContext();
1780	const RenderContext&	renderCtx	= m_context.getRenderContext();
1781
1782	{
1783		tcu::TestCaseGroup* const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency");
1784
1785		addChild(cullEfficiencyGroup);
1786
1787		{
1788			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload");
1789
1790			cullEfficiencyGroup->addChild(group);
1791
1792			group->addChild(new FragmentTextureWorkloadCullCase(			testCtx, renderCtx, "workload_texture",				"Fragment shader with texture lookup workload"));
1793			group->addChild(new FragmentArithmeticWorkloadCullCase(			testCtx, renderCtx, "workload_arithmetic",			"Fragment shader with arithmetic workload"));
1794			group->addChild(new FragmentDiscardArithmeticWorkloadCullCase(	testCtx, renderCtx, "workload_arithmetic_discard",	"Fragment shader that may discard with arithmetic workload"));
1795		}
1796
1797		{
1798			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard");
1799
1800			cullEfficiencyGroup->addChild(group);
1801
1802			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256",	"Parts of occluder geometry discarded", 256));
1803			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128",	"Parts of occluder geometry discarded", 128));
1804			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64",	"Parts of occluder geometry discarded", 64));
1805			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32",	"Parts of occluder geometry discarded", 32));
1806			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16",	"Parts of occluder geometry discarded", 16));
1807			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8",	"Parts of occluder geometry discarded", 8));
1808			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4",	"Parts of occluder geometry discarded", 4));
1809			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2",	"Parts of occluder geometry discarded", 2));
1810			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1",	"Parts of occluder geometry discarded", 1));
1811		}
1812
1813		{
1814			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage");
1815
1816			cullEfficiencyGroup->addChild(group);
1817
1818			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f));
1819			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f));
1820			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f));
1821			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f));
1822			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f));
1823			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f));
1824			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f));
1825			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f));
1826			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f));
1827		}
1828
1829		{
1830			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage");
1831
1832			cullEfficiencyGroup->addChild(group);
1833
1834			group->addChild(new StaticOccluderFragDepthCullCase( testCtx, renderCtx, "occluder_static", ""));
1835			group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", ""));
1836			group->addChild(new StaticOccludedFragDepthCullCase( testCtx, renderCtx, "occluded_static", ""));
1837			group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", ""));
1838		}
1839
1840		{
1841			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order");
1842
1843			cullEfficiencyGroup->addChild(group);
1844
1845			group->addChild(new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order"));
1846		}
1847	}
1848
1849	{
1850		tcu::TestCaseGroup* const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency");
1851
1852		addChild(testCostGroup);
1853
1854		{
1855			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences");
1856
1857			testCostGroup->addChild(group);
1858
1859			group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", ""));
1860			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f));
1861			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f));
1862			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f));
1863			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f));
1864			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f));
1865		}
1866
1867		{
1868			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity");
1869
1870			testCostGroup->addChild(group);
1871
1872			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5",   "", 5,   0.0f, 0.0f));
1873			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15",  "", 15,  0.0f, 0.0f));
1874			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25",  "", 25,  0.0f, 0.0f));
1875			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50",  "", 50,  0.0f, 0.0f));
1876			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f));
1877
1878			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5",   "", 5,   1.0f/5.0f,   0.0f));
1879			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15",  "", 15,  1.0f/15.0f,  0.0f));
1880			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25",  "", 25,  1.0f/25.0f,  0.0f));
1881			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50",  "", 50,  1.0f/50.0f,  0.0f));
1882			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f/100.0f, 0.0f));
1883
1884			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5",   "", 5,   0.0f, 0.2f));
1885			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15",  "", 15,  0.0f, 0.2f));
1886			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25",  "", 25,  0.0f, 0.2f));
1887			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50",  "", 50,  0.0f, 0.2f));
1888			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f));
1889
1890			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5",   "", 5,   1.0f/5.0f,   0.2f));
1891			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15",  "", 15,  1.0f/15.0f,  0.2f));
1892			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25",  "", 25,  1.0f/25.0f,  0.2f));
1893			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50",  "", 50,  1.0f/50.0f,  0.2f));
1894			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f/100.0f, 0.2f));
1895		}
1896
1897		{
1898			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth");
1899
1900			testCostGroup->addChild(group);
1901
1902			group->addChild(new OccluderStaticFragDepthCostCase( testCtx, renderCtx, "occluder_static", ""));
1903			group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", ""));
1904			group->addChild(new OccludedStaticFragDepthCostCase( testCtx, renderCtx, "occluded_static", ""));
1905			group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", ""));
1906		}
1907	}
1908}
1909
1910} // Performance
1911} // gles3
1912} // deqp
1913