1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Depth buffer performance tests.
22 *//*--------------------------------------------------------------------*/
23
24#include "es3pDepthTests.hpp"
25
26#include "glsCalibration.hpp"
27
28#include "gluShaderProgram.hpp"
29#include "gluObjectWrapper.hpp"
30#include "gluPixelTransfer.hpp"
31
32#include "glwFunctions.hpp"
33#include "glwEnums.hpp"
34
35#include "tcuTestLog.hpp"
36#include "tcuStringTemplate.hpp"
37#include "tcuCPUWarmup.hpp"
38#include "tcuCommandLine.hpp"
39
40#include "deClock.h"
41#include "deString.h"
42#include "deMath.h"
43#include "deStringUtil.hpp"
44#include "deRandom.hpp"
45#include "deUniquePtr.hpp"
46
47#include <vector>
48#include <algorithm>
49
50namespace deqp
51{
52namespace gles3
53{
54namespace Performance
55{
56namespace
57{
58using namespace glw;
59using de::MovePtr;
60using tcu::TestContext;
61using tcu::TestLog;
62using tcu::Vec4;
63using tcu::Vec3;
64using tcu::Vec2;
65using glu::RenderContext;
66using glu::ProgramSources;
67using glu::ShaderSource;
68using std::vector;
69using std::string;
70using std::map;
71
72struct Sample
73{
74	deInt64	nullTime;
75	deInt64	baseTime;
76	deInt64	testTime;
77	int		order;
78	int		workload;
79};
80
81struct SampleParams
82{
83	int step;
84	int measurement;
85
86	SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) {}
87};
88
89typedef vector<float> Geometry;
90
91struct ObjectData
92{
93	ProgramSources	shader;
94	Geometry		geometry;
95
96	ObjectData (const ProgramSources& shader_, const Geometry& geometry_) : shader(shader_), geometry(geometry_) {}
97};
98
99class RenderData
100{
101public:
102								RenderData		(const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log);
103								~RenderData		(void) {};
104
105	const glu::ShaderProgram	m_program;
106	const glu::VertexArray		m_vao;
107	const glu::Buffer			m_vbo;
108
109	const int					m_numVertices;
110};
111
112RenderData::RenderData (const ObjectData& object, const  glu::RenderContext& renderCtx, TestLog& log)
113	: m_program		(renderCtx, object.shader)
114	, m_vao			(renderCtx.getFunctions())
115	, m_vbo			(renderCtx.getFunctions())
116	, m_numVertices	(int(object.geometry.size())/4)
117{
118	const glw::Functions& gl = renderCtx.getFunctions();
119
120	if (!m_program.isOk())
121		log << m_program;
122
123	gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo);
124	gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW);
125	gl.bindAttribLocation(m_program.getProgram(), 0, "a_position");
126
127	gl.bindVertexArray(*m_vao);
128	gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
129	gl.enableVertexAttribArray(0);
130	gl.bindVertexArray(0);
131}
132
133namespace Utils
134{
135	vector<float> getFullscreenQuad (float depth)
136	{
137		const float data[] =
138		{
139			+1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own
140			+1.0f, -1.0f, depth, 1.0f,
141			-1.0f, -1.0f, depth, 2.0f,
142			-1.0f, -1.0f, depth, 0.0f,
143			-1.0f, +1.0f, depth, 1.0f,
144			+1.0f, +1.0f, depth, 2.0f,
145		};
146
147		return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
148	}
149
150	vector<float> getFullscreenQuadWithGradient (float depth0, float depth1)
151	{
152		const float data[] =
153		{
154			+1.0f, +1.0f, depth0, 0.0f,
155			+1.0f, -1.0f, depth0, 1.0f,
156			-1.0f, -1.0f, depth1, 2.0f,
157			-1.0f, -1.0f, depth1, 0.0f,
158			-1.0f, +1.0f, depth1, 1.0f,
159			+1.0f, +1.0f, depth0, 2.0f,
160		};
161
162		return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
163	}
164
165	vector<float> getPartScreenQuad (float coverage, float depth)
166	{
167		const float xMax	= -1.0f + 2.0f*coverage;
168		const float data[]	=
169		{
170			 xMax, +1.0f, depth, 0.0f,
171			 xMax, -1.0f, depth, 1.0f,
172			-1.0f, -1.0f, depth, 2.0f,
173			-1.0f, -1.0f, depth, 0.0f,
174			-1.0f, +1.0f, depth, 1.0f,
175			 xMax, +1.0f, depth, 2.0f,
176		};
177
178		return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data));
179	}
180
181	// Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise
182	vector<float> getFullScreenGrid (int resolution, deUint32 seed, float baseDepth, float depthNoise, float xyNoise)
183	{
184		const int		gridsize	= resolution+1;
185		vector<Vec3>	vertices	(gridsize*gridsize);
186		vector<float>	retval;
187		de::Random		rng			(seed);
188
189		for (int y = 0; y < gridsize; y++)
190		for (int x = 0; x < gridsize; x++)
191		{
192			const bool	isEdge	= x == 0 || y == 0 || x == resolution || y == resolution;
193			const float x_		= float(x)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise));
194			const float y_		= float(y)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise));
195			const float z_		= baseDepth + rng.getFloat(-depthNoise, +depthNoise);
196
197			vertices[y*gridsize + x] = Vec3(x_, y_, z_);
198		}
199
200		retval.reserve(resolution*resolution*6);
201
202		for (int y = 0; y < resolution; y++)
203		for (int x = 0; x < resolution; x++)
204		{
205			const Vec3& p0 = vertices[(y+0)*gridsize + (x+0)];
206			const Vec3& p1 = vertices[(y+0)*gridsize + (x+1)];
207			const Vec3& p2 = vertices[(y+1)*gridsize + (x+0)];
208			const Vec3& p3 = vertices[(y+1)*gridsize + (x+1)];
209
210			const float temp[6*4] =
211			{
212				p0.x(), p0.y(), p0.z(), 0.0f,
213				p2.x(), p2.y(), p2.z(), 1.0f,
214				p1.x(), p1.y(), p1.z(), 2.0f,
215
216				p3.x(), p3.y(), p3.z(), 0.0f,
217				p1.x(), p1.y(), p1.z(), 1.0f,
218				p2.x(), p2.y(), p2.z(), 2.0f,
219			};
220
221			retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp));
222		}
223
224		return retval;
225	}
226
227	// Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader
228	string getBaseVertexShader (void)
229	{
230		return "#version 300 es\n"
231				"in highp vec4 a_position;\n"
232				"out mediump vec3 v_bcoords;\n"
233				"void main()\n"
234				"{\n"
235				"	v_bcoords = vec3(0, 0, 0);\n"
236				"	v_bcoords[int(a_position.w)] = 1.0;\n"
237				"	gl_Position = vec4(a_position.xyz, 1.0);\n"
238				"}\n";
239	}
240
241	// Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords
242	string getInstanceNoiseVertexShader (void)
243	{
244		return "#version 300 es\n"
245				"in highp vec4 a_position;\n"
246				"out mediump vec3 v_bcoords;\n"
247				"void main()\n"
248				"{\n"
249				"	v_bcoords = vec3(0, 0, 0);\n"
250				"	v_bcoords[int(a_position.w)] = 1.0;\n"
251				"	vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), sin(float(gl_InstanceID)*1.71));\n"
252				"	gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n"
253				"}\n";
254	}
255
256	// Renders green triangles with edges highlighted. Exact shade depends on depth.
257	string getDepthAsGreenFragmentShader (void)
258	{
259		return	"#version 300 es\n"
260				"in mediump vec3 v_bcoords;\n"
261				"out mediump vec4 fragColor;\n"
262				"void main()\n"
263				"{\n"
264				"	mediump float d = gl_FragCoord.z;\n"
265				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
266				"		fragColor = vec4(d,1,d,1);\n"
267				"	else\n"
268				"		fragColor = vec4(0,d,0,1);\n"
269				"}\n";
270	}
271
272	// Renders green triangles with edges highlighted. Exact shade depends on depth.
273	string getDepthAsRedFragmentShader (void)
274	{
275		return	"#version 300 es\n"
276				"in mediump vec3 v_bcoords;\n"
277				"out mediump vec4 fragColor;\n"
278				"void main()\n"
279				"{\n"
280				"	mediump float d = gl_FragCoord.z;\n"
281				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
282				"		fragColor = vec4(1,d,d,1);\n"
283				"	else\n"
284				"		fragColor = vec4(d,0,0,1);\n"
285				"}\n";
286	}
287
288	// Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth.
289	string getArithmeticWorkloadFragmentShader (void)
290	{
291
292		return	"#version 300 es\n"
293				"in mediump vec3 v_bcoords;\n"
294				"out mediump vec4 fragColor;\n"
295				"uniform mediump int u_iterations;\n"
296				"void main()\n"
297				"{\n"
298				"	mediump float d = gl_FragCoord.z;\n"
299				"	for (int i = 0; i<u_iterations; i++)\n"
300				// cos(a)^2 + sin(a)^2 == 1. since d is in range [0,1] this will lose a few ULP's of precision per iteration but should not significantly change the value of d without extreme iteration counts
301				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
302				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
303				"		fragColor = vec4(1,d,d,1);\n"
304				"	else\n"
305				"		fragColor = vec4(d,0,0,1);\n"
306				"}\n";
307	}
308
309	// Arithmetic workload shader but contains discard
310	string getArithmeticWorkloadDiscardFragmentShader (void)
311	{
312		return	"#version 300 es\n"
313				"in mediump vec3 v_bcoords;\n"
314				"out mediump vec4 fragColor;\n"
315				"uniform mediump int u_iterations;\n"
316				"void main()\n"
317				"{\n"
318				"	mediump float d = gl_FragCoord.z;\n"
319				"	for (int i = 0; i<u_iterations; i++)\n"
320				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
321				"	if (d < 0.5) discard;\n"
322				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
323				"		fragColor = vec4(1,d,d,1);\n"
324				"	else\n"
325				"		fragColor = vec4(d,0,0,1);\n"
326				"}\n";
327	}
328
329	// Texture fetch based time waster. Renders red triangles with edges highlighted. Exact shade depends on depth.
330	string getTextureWorkloadFragmentShader (void)
331	{
332		return  "#version 300 es\n"
333				"in mediump vec3 v_bcoords;\n"
334				"out mediump vec4 fragColor;\n"
335				"uniform mediump int u_iterations;\n"
336				"uniform sampler2D u_texture;\n"
337				"void main()\n"
338				"{\n"
339				"	mediump float d = gl_FragCoord.z;\n"
340				"	for (int i = 0; i<u_iterations; i++)\n"
341				"		d *= texture(u_texture, (gl_FragCoord.xy+vec2(i))/512.0).r;\n" // Texture is expected to be fully white
342				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
343				"		fragColor = vec4(1,1,1,1);\n"
344				"	else\n"
345				"		fragColor = vec4(d,0,0,1);\n"
346				"}\n";
347	}
348
349	// Discard fragments in a grid pattern
350	string getGridDiscardFragmentShader (int gridsize)
351	{
352		const string		fragSrc = "#version 300 es\n"
353									  "in mediump vec3 v_bcoords;\n"
354									  "out mediump vec4 fragColor;\n"
355									  "void main()\n"
356									  "{\n"
357									  "	mediump float d = gl_FragCoord.z;\n"
358									  "	if ((int(gl_FragCoord.x)/${GRIDRENDER_SIZE} + int(gl_FragCoord.y)/${GRIDRENDER_SIZE})%2 == 0)\n"
359									  "		discard;\n"
360									  "	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
361									  "		fragColor = vec4(d,1,d,1);\n"
362									  "	else\n"
363									  "		fragColor = vec4(0,d,0,1);\n"
364									  "}\n";
365		map<string, string>	params;
366
367		params["GRIDRENDER_SIZE"] = de::toString(gridsize);
368
369		return tcu::StringTemplate(fragSrc).specialize(params);
370	}
371
372	// A static increment to frag depth
373	string getStaticFragDepthFragmentShader (void)
374	{
375		return	"#version 300 es\n"
376				"in mediump vec3 v_bcoords;\n"
377				"out mediump vec4 fragColor;\n"
378				"void main()\n"
379				"{\n"
380				"	mediump float d = gl_FragCoord.z;\n"
381				"	gl_FragDepth = gl_FragCoord.z + 0.1;\n"
382				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
383				"		fragColor = vec4(d,1,d,1);\n"
384				"	else\n"
385				"		fragColor = vec4(0,d,0,1);\n"
386				"}\n";
387	}
388
389	// A trivial dynamic change to frag depth
390	string getDynamicFragDepthFragmentShader (void)
391	{
392		return	"#version 300 es\n"
393				"in mediump vec3 v_bcoords;\n"
394				"out mediump vec4 fragColor;\n"
395				"void main()\n"
396				"{\n"
397				"	mediump float d = gl_FragCoord.z;\n"
398				"	gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1
399				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
400				"		fragColor = vec4(d,1,d,1);\n"
401				"	else\n"
402				"		fragColor = vec4(0,d,0,1);\n"
403				"}\n";
404	}
405
406	// A static increment to frag depth
407	string getStaticFragDepthArithmeticWorkloadFragmentShader (void)
408	{
409		return	"#version 300 es\n"
410				"in mediump vec3 v_bcoords;\n"
411				"out mediump vec4 fragColor;\n"
412				"uniform mediump int u_iterations;\n"
413				"void main()\n"
414				"{\n"
415				"	mediump float d = gl_FragCoord.z;\n"
416				"	gl_FragDepth = gl_FragCoord.z + 0.1;\n"
417				"	for (int i = 0; i<u_iterations; i++)\n"
418				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
419				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
420				"		fragColor = vec4(1,d,d,1);\n"
421				"	else\n"
422				"		fragColor = vec4(d,0,0,1);\n"
423				"}\n";
424	}
425
426	// A trivial dynamic change to frag depth
427	string getDynamicFragDepthArithmeticWorkloadFragmentShader (void)
428	{
429		return	"#version 300 es\n"
430				"in mediump vec3 v_bcoords;\n"
431				"out mediump vec4 fragColor;\n"
432				"uniform mediump int u_iterations;\n"
433				"void main()\n"
434				"{\n"
435				"	mediump float d = gl_FragCoord.z;\n"
436				"	gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1
437				"	for (int i = 0; i<u_iterations; i++)\n"
438				"		d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n"
439				"	if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n"
440				"		fragColor = vec4(1,d,d,1);\n"
441				"	else\n"
442				"		fragColor = vec4(d,0,0,1);\n"
443				"}\n";
444	}
445
446	glu::ProgramSources getBaseShader (void)
447	{
448		return glu::makeVtxFragSources(getBaseVertexShader(), getDepthAsGreenFragmentShader());
449	}
450
451	glu::ProgramSources getArithmeticWorkloadShader (void)
452	{
453		return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadFragmentShader());
454	}
455
456	glu::ProgramSources getArithmeticWorkloadDiscardShader (void)
457	{
458		return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadDiscardFragmentShader());
459	}
460
461	glu::ProgramSources getTextureWorkloadShader (void)
462	{
463		return glu::makeVtxFragSources(getBaseVertexShader(), getTextureWorkloadFragmentShader());
464	}
465
466	glu::ProgramSources getGridDiscardShader (int gridsize)
467	{
468		return glu::makeVtxFragSources(getBaseVertexShader(), getGridDiscardFragmentShader(gridsize));
469	}
470
471	inline ObjectData quadWith (const glu::ProgramSources& shader, float depth)
472	{
473		return ObjectData(shader, getFullscreenQuad(depth));
474	}
475
476	inline ObjectData quadWith (const string& fragShader, float depth)
477	{
478		return ObjectData(glu::makeVtxFragSources(getBaseVertexShader(), fragShader), getFullscreenQuad(depth));
479	}
480
481	inline ObjectData variableQuad (float depth)
482	{
483		return ObjectData(glu::makeVtxFragSources(getInstanceNoiseVertexShader(), getDepthAsRedFragmentShader()), getFullscreenQuad(depth));
484	}
485
486	inline ObjectData fastQuad (float depth)
487	{
488		return ObjectData(getBaseShader(), getFullscreenQuad(depth));
489	}
490
491	inline ObjectData slowQuad (float depth)
492	{
493		return ObjectData(getArithmeticWorkloadShader(), getFullscreenQuad(depth));
494	}
495
496	inline ObjectData fastQuadWithGradient (float depth0, float depth1)
497	{
498		return ObjectData(getBaseShader(), getFullscreenQuadWithGradient(depth0, depth1));
499	}
500} // Utils
501
502// Shared base
503class BaseCase : public tcu::TestCase
504{
505public:
506	enum {RENDER_SIZE = 512};
507
508							BaseCase			(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
509	virtual					~BaseCase			(void) {}
510
511	virtual IterateResult	iterate				(void);
512
513protected:
514	void					logSamples			(const vector<Sample>& samples, const string& name, const string& desc);
515	void					logGeometry			(const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg);
516	virtual void			logAnalysis			(const vector<Sample>& samples) = 0;
517	virtual void			logDescription		(void) = 0;
518
519	virtual ObjectData		genOccluderGeometry	(void) const = 0;
520	virtual ObjectData		genOccludedGeometry	(void) const = 0;
521
522	virtual int				calibrate			(void) const = 0;
523	virtual Sample			renderSample		(const RenderData& occluder, const RenderData& occluded, int workload) const = 0;
524
525	void					render				(const RenderData& data) const;
526	void					render				(const RenderData& data, int instances) const;
527
528	const RenderContext&	m_renderCtx;
529	tcu::ResultCollector	m_results;
530
531	enum {ITERATION_STEPS = 10, ITERATION_SAMPLES = 16};
532};
533
534BaseCase::BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
535	: TestCase		(testCtx, tcu::NODETYPE_PERFORMANCE, name, desc)
536	, m_renderCtx	(renderCtx)
537{
538}
539
540BaseCase::IterateResult BaseCase::iterate (void)
541{
542	typedef de::MovePtr<RenderData> RenderDataP;
543
544	const glw::Functions&	gl					= m_renderCtx.getFunctions();
545	TestLog&				log					= m_testCtx.getLog();
546
547	const glu::Framebuffer	framebuffer			(gl);
548	const glu::Renderbuffer	renderbuffer		(gl);
549	const glu::Renderbuffer	depthbuffer			(gl);
550
551	vector<Sample>			results;
552	vector<int>				params;
553	RenderDataP				occluderData;
554	RenderDataP				occludedData;
555	tcu::TextureLevel		resultTex			(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE);
556	int						maxWorkload			= 0;
557	de::Random				rng					(deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed());
558
559	logDescription();
560
561	gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer);
562	gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE);
563	gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer);
564	gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE);
565
566	gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer);
567	gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer);
568	gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer);
569	gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE);
570	gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f);
571
572	maxWorkload = calibrate();
573
574	// Setup data
575	occluderData = RenderDataP(new RenderData (genOccluderGeometry(), m_renderCtx, log));
576	occludedData = RenderDataP(new RenderData (genOccludedGeometry(), m_renderCtx, log));
577
578	TCU_CHECK(occluderData->m_program.isOk());
579	TCU_CHECK(occludedData->m_program.isOk());
580
581	// Force initialization of GPU resources
582	gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
583	gl.enable(GL_DEPTH_TEST);
584
585	render(*occluderData);
586	render(*occludedData);
587	glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess());
588
589	logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program);
590
591	params.reserve(ITERATION_STEPS*ITERATION_SAMPLES);
592
593	// Setup parameters
594	for (int step = 0; step < ITERATION_STEPS; step++)
595	{
596		const int workload = maxWorkload*step/ITERATION_STEPS;
597
598		for (int count = 0; count < ITERATION_SAMPLES; count++)
599			params.push_back(workload);
600	}
601
602	rng.shuffle(params.begin(), params.end());
603
604	// Render samples
605	for (size_t ndx = 0; ndx < params.size(); ndx++)
606	{
607		const int	workload	= params[ndx];
608		Sample		sample		= renderSample(*occluderData, *occludedData, workload);
609
610		sample.workload = workload;
611		sample.order = int(ndx);
612
613		results.push_back(sample);
614	}
615
616	logSamples(results, "Samples", "Samples");
617	logAnalysis(results);
618
619	m_results.setTestContextResult(m_testCtx);
620
621	return STOP;
622}
623
624void BaseCase::logSamples (const vector<Sample>& samples, const string& name, const string& desc)
625{
626	TestLog& log = m_testCtx.getLog();
627
628	bool testOnly = true;
629
630	for (size_t ndx = 0; ndx < samples.size(); ndx++)
631	{
632		if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0)
633		{
634			testOnly = false;
635			break;
636		}
637	}
638
639	log << TestLog::SampleList(name, desc);
640
641	if (testOnly)
642	{
643		log << TestLog::SampleInfo
644			<< TestLog::ValueInfo("Workload",	"Workload",			"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
645			<< TestLog::ValueInfo("Order",		"Order of sample",	"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
646			<< TestLog::ValueInfo("TestTime",	"Test render time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
647			<< TestLog::EndSampleInfo;
648
649		for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++)
650		{
651			const Sample& sample = samples[sampleNdx];
652
653			log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample;
654		}
655	}
656	else
657	{
658		log << TestLog::SampleInfo
659			<< TestLog::ValueInfo("Workload",	"Workload",			"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
660			<< TestLog::ValueInfo("Order",		"Order of sample",	"",				QP_SAMPLE_VALUE_TAG_PREDICTOR)
661			<< TestLog::ValueInfo("TestTime",	"Test render time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
662			<< TestLog::ValueInfo("NullTime",	"Read pixels time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
663			<< TestLog::ValueInfo("BaseTime",	"Base render time",	"us",			QP_SAMPLE_VALUE_TAG_RESPONSE)
664			<< TestLog::EndSampleInfo;
665
666		for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++)
667		{
668			const Sample& sample = samples[sampleNdx];
669
670			log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample;
671		}
672	}
673
674	log << TestLog::EndSampleList;
675}
676
677void BaseCase::logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg)
678{
679	TestLog& log = m_testCtx.getLog();
680
681	log << TestLog::Section("Geometry", "Geometry");
682	log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage;
683	log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage;
684	log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage;
685
686	log << TestLog::Image("Test Geometry", "Test Geometry",  sample);
687	log << TestLog::EndSection;
688
689	log << TestLog::Section("Occluder", "Occluder");
690	log << occluderProg;
691	log << TestLog::EndSection;
692
693	log << TestLog::Section("Occluded", "Occluded");
694	log << occludedProg;
695	log << TestLog::EndSection;
696}
697
698void BaseCase::render (const RenderData& data) const
699{
700	const glw::Functions& gl = m_renderCtx.getFunctions();
701
702	gl.useProgram(data.m_program.getProgram());
703
704	gl.bindVertexArray(*data.m_vao);
705	gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices);
706	gl.bindVertexArray(0);
707}
708
709void BaseCase::render (const RenderData& data, int instances) const
710{
711	const glw::Functions& gl = m_renderCtx.getFunctions();
712
713	gl.useProgram(data.m_program.getProgram());
714
715	gl.bindVertexArray(*data.m_vao);
716	gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances);
717	gl.bindVertexArray(0);
718}
719
720// Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression
721class RenderCountCase : public BaseCase
722{
723public:
724					RenderCountCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
725					~RenderCountCase	(void) {}
726
727protected:
728	virtual void	logAnalysis			(const vector<Sample>& samples);
729
730private:
731	virtual int		calibrate			(void) const;
732	virtual Sample	renderSample		(const RenderData& occluder, const RenderData& occluded, int callcount) const;
733};
734
735RenderCountCase::RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
736	: BaseCase	(testCtx, renderCtx, name, desc)
737{
738}
739
740void RenderCountCase::logAnalysis (const vector<Sample>& samples)
741{
742	using namespace gls;
743
744	TestLog&		log			= m_testCtx.getLog();
745	int				maxWorkload	= 0;
746	vector<Vec2>	testSamples	(samples.size());
747
748	for (size_t ndx = 0; ndx < samples.size(); ndx++)
749	{
750		const Sample& sample = samples[ndx];
751
752		testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime);
753
754		maxWorkload = de::max(maxWorkload, sample.workload);
755	}
756
757	{
758		const float							confidence	= 0.60f;
759		const LineParametersWithConfidence	testParam	= theilSenSiegelLinearRegression(testSamples, confidence);
760		const float							usPerCall	= testParam.coefficient;
761		const float							pxPerCall	= RENDER_SIZE*RENDER_SIZE;
762		const float							pxPerUs		= pxPerCall/usPerCall;
763		const float							mpxPerS		= pxPerUs;
764
765		log << TestLog::Section("Linear Regression", "Linear Regression");
766		log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage;
767		log << TestLog::Message << "Render time for scene with depth test was\n\t"
768			<< "[" << testParam.offsetConfidenceLower << ", " << testParam.offset <<  ", " << testParam.offsetConfidenceUpper << "]us +"
769			<< "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]"
770			<< "us/workload" << TestLog::EndMessage;
771		log << TestLog::EndSection;
772
773		log << TestLog::Section("Result", "Result");
774
775		if (testParam.coefficientConfidenceLower < 0.0f)
776		{
777			log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible per-pixel cost" << TestLog::EndMessage;
778			m_results.addResult(QP_TEST_RESULT_PASS, "Pass");
779		}
780		else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25)
781		{
782			log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage;
783			m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
784		}
785		else
786		{
787			log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage;
788			m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2));
789		}
790
791		log << TestLog::EndSection;
792	}
793}
794
795Sample RenderCountCase::renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const
796{
797	const glw::Functions&	gl		= m_renderCtx.getFunctions();
798	Sample					sample;
799	deUint64				now		= 0;
800	deUint64				prev	= 0;
801	deUint8					buffer[4];
802
803	// Stabilize
804	{
805		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
806		gl.enable(GL_DEPTH_TEST);
807		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
808	}
809
810	prev = deGetMicroseconds();
811
812	gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
813	gl.enable(GL_DEPTH_TEST);
814
815	render(occluder);
816	render(occluded, callcount);
817
818	gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
819
820	now = deGetMicroseconds();
821
822	sample.testTime = now - prev;
823	sample.baseTime = 0;
824	sample.nullTime = 0;
825	sample.workload = callcount;
826
827	return sample;
828}
829
830int RenderCountCase::calibrate (void) const
831{
832	using namespace gls;
833
834	const glw::Functions&	gl					= m_renderCtx.getFunctions();
835	TestLog&				log					= m_testCtx.getLog();
836
837	const RenderData		occluderGeometry	(genOccluderGeometry(), m_renderCtx, log);
838	const RenderData		occludedGeometry	(genOccludedGeometry(), m_renderCtx, log);
839
840	TheilSenCalibrator		calibrator			(CalibratorParameters(20, // Initial workload
841																	  10, // Max iteration frames
842																	  20.0f, // Iteration shortcut threshold ms
843																	  20, // Max iterations
844																	  33.0f, // Target frame time
845																	  40.0f, // Frame time cap
846																	  1000.0f // Target measurement duration
847																	  ));
848
849	while (true)
850	{
851		switch(calibrator.getState())
852		{
853			case TheilSenCalibrator::STATE_FINISHED:
854				logCalibrationInfo(m_testCtx.getLog(), calibrator);
855				return calibrator.getCallCount();
856
857			case TheilSenCalibrator::STATE_MEASURE:
858			{
859				deUint8	buffer[4];
860				deInt64 now;
861				deInt64 prev;
862
863				prev = deGetMicroseconds();
864
865				gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
866				gl.disable(GL_DEPTH_TEST);
867
868				render(occluderGeometry);
869				render(occludedGeometry, calibrator.getCallCount());
870
871				gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
872
873				now = deGetMicroseconds();
874
875				calibrator.recordIteration(now - prev);
876				break;
877			}
878
879			case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS:
880				calibrator.recomputeParameters();
881				break;
882			default:
883				DE_ASSERT(false);
884				return 1;
885		}
886	}
887}
888
889// Compares time/workload gradients of same geometry with and without depth testing
890class RelativeChangeCase : public BaseCase
891{
892public:
893					RelativeChangeCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
894	virtual			~RelativeChangeCase	(void) {}
895
896protected:
897	Sample			renderSample		(const RenderData& occluder, const RenderData& occluded, int workload) const;
898
899	virtual void	logAnalysis			(const vector<Sample>& samples);
900
901private:
902	int				calibrate			(void) const;
903};
904
905RelativeChangeCase::RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
906	: BaseCase		(testCtx, renderCtx, name, desc)
907{
908}
909
910int RelativeChangeCase::calibrate (void) const
911{
912	using namespace gls;
913
914	const glw::Functions&	gl		= m_renderCtx.getFunctions();
915	TestLog&				log		= m_testCtx.getLog();
916
917	const RenderData		geom	(genOccludedGeometry(), m_renderCtx, log);
918
919	TheilSenCalibrator calibrator(CalibratorParameters( 20, // Initial workload
920														10, // Max iteration frames
921														20.0f, // Iteration shortcut threshold ms
922														20, // Max iterations
923														10.0f, // Target frame time
924														15.0f, // Frame time cap
925														1000.0f // Target measurement duration
926														));
927
928	while (true)
929	{
930		switch(calibrator.getState())
931		{
932			case TheilSenCalibrator::STATE_FINISHED:
933				logCalibrationInfo(m_testCtx.getLog(), calibrator);
934				return calibrator.getCallCount();
935
936			case TheilSenCalibrator::STATE_MEASURE:
937			{
938				deUint8			buffer[4];
939				const GLuint	program	= geom.m_program.getProgram();
940
941				gl.useProgram(program);
942				gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount());
943
944				const deInt64 prev = deGetMicroseconds();
945
946				gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
947				gl.disable(GL_DEPTH_TEST);
948
949				render(geom);
950
951				gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
952
953				const deInt64 now = deGetMicroseconds();
954
955				calibrator.recordIteration(now - prev);
956				break;
957			}
958
959			case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS:
960				calibrator.recomputeParameters();
961				break;
962			default:
963				DE_ASSERT(false);
964				return 1;
965		}
966	}
967}
968
969Sample RelativeChangeCase::renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const
970{
971	const glw::Functions&	gl		= m_renderCtx.getFunctions();
972	const GLuint			program	= occluded.m_program.getProgram();
973	Sample					sample;
974	deUint64				now		= 0;
975	deUint64				prev	= 0;
976	deUint8					buffer[4];
977
978	gl.useProgram(program);
979	gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload);
980
981	// Warmup (this workload seems to reduce variation in following workloads)
982	{
983		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
984		gl.disable(GL_DEPTH_TEST);
985
986		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
987	}
988
989	// Null time
990	{
991		prev = deGetMicroseconds();
992
993		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
994		gl.disable(GL_DEPTH_TEST);
995
996		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
997
998		now = deGetMicroseconds();
999
1000		sample.nullTime = now - prev;
1001	}
1002
1003	// Test time
1004	{
1005		prev = deGetMicroseconds();
1006
1007		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1008		gl.enable(GL_DEPTH_TEST);
1009
1010		render(occluder);
1011		render(occluded);
1012
1013		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1014
1015		now = deGetMicroseconds();
1016
1017		sample.testTime = now - prev;
1018	}
1019
1020	// Base time
1021	{
1022		prev = deGetMicroseconds();
1023
1024		gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1025		gl.disable(GL_DEPTH_TEST);
1026
1027		render(occluder);
1028		render(occluded);
1029
1030		gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1031
1032		now = deGetMicroseconds();
1033
1034		sample.baseTime = now - prev;
1035	}
1036
1037	sample.workload = 0;
1038
1039	return sample;
1040}
1041
1042void RelativeChangeCase::logAnalysis (const vector<Sample>& samples)
1043{
1044	using namespace gls;
1045
1046	TestLog&		log			= m_testCtx.getLog();
1047
1048	int				maxWorkload	= 0;
1049
1050	vector<Vec2>	nullSamples	(samples.size());
1051	vector<Vec2>	baseSamples	(samples.size());
1052	vector<Vec2>	testSamples	(samples.size());
1053
1054	for (size_t ndx = 0; ndx < samples.size(); ndx++)
1055	{
1056		const Sample& sample = samples[ndx];
1057
1058		nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime);
1059		baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime);
1060		testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime);
1061
1062		maxWorkload = de::max(maxWorkload, sample.workload);
1063	}
1064
1065	{
1066		const float							confidence	= 0.60f;
1067
1068		const LineParametersWithConfidence	nullParam	= theilSenSiegelLinearRegression(nullSamples, confidence);
1069		const LineParametersWithConfidence	baseParam	= theilSenSiegelLinearRegression(baseSamples, confidence);
1070		const LineParametersWithConfidence	testParam	= theilSenSiegelLinearRegression(testSamples, confidence);
1071
1072		if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper))
1073		{
1074			m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant");
1075			log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage;
1076		}
1077
1078		if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper))
1079		{
1080			m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration");
1081			log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage;
1082		}
1083
1084		log << TestLog::Section("Linear Regression", "Linear Regression");
1085		log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage;
1086
1087		log << TestLog::Message << "Render time for empty scene was\n\t"
1088			<< "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset <<  ", " << nullParam.offsetConfidenceUpper << "]us +"
1089			<< "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]"
1090			<< "us/workload" << TestLog::EndMessage;
1091
1092		log << TestLog::Message << "Render time for scene without depth test was\n\t"
1093			<< "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset <<  ", " << baseParam.offsetConfidenceUpper << "]us +"
1094			<< "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]"
1095			<< "us/workload" << TestLog::EndMessage;
1096
1097		log << TestLog::Message << "Render time for scene with depth test was\n\t"
1098			<< "[" << testParam.offsetConfidenceLower << ", " << testParam.offset <<  ", " << testParam.offsetConfidenceUpper << "]us +"
1099			<< "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]"
1100			<< "us/workload" << TestLog::EndMessage;
1101
1102		log << TestLog::EndSection;
1103
1104		if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper))
1105		{
1106			log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage;
1107			m_results.addResult(QP_TEST_RESULT_PASS, "0.0");
1108		}
1109		else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25)
1110		{
1111			log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage;
1112			m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
1113		}
1114		else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper*0.25)
1115		{
1116			log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage;
1117			m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low");
1118		}
1119		else
1120		{
1121			log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage;
1122			m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient)/de::abs(baseParam.coefficient), 2));
1123		}
1124	}
1125}
1126
1127// Speed of trivial culling
1128class BaseCostCase : public RenderCountCase
1129{
1130public:
1131						BaseCostCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1132							: RenderCountCase (testCtx, renderCtx, name, desc) {}
1133
1134						~BaseCostCase		(void) {}
1135
1136private:
1137	virtual ObjectData	genOccluderGeometry	(void) const { return Utils::fastQuad(0.2f); }
1138	virtual ObjectData	genOccludedGeometry	(void) const { return Utils::variableQuad(0.8f); }
1139
1140	virtual void		logDescription		(void)
1141	{
1142		TestLog& log = m_testCtx.getLog();
1143
1144		log << TestLog::Section("Description", "Test description");
1145		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1146		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1147		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered"  << TestLog::EndMessage;
1148		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1149		log << TestLog::EndSection;
1150	}
1151};
1152
1153// Gradient
1154class GradientCostCase : public RenderCountCase
1155{
1156public:
1157						GradientCostCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float gradientDistance)
1158							: RenderCountCase		(testCtx, renderCtx, name, desc)
1159							, m_gradientDistance	(gradientDistance)
1160						{
1161						}
1162
1163						~GradientCostCase	(void) {}
1164
1165private:
1166	virtual ObjectData	genOccluderGeometry	(void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); }
1167	virtual ObjectData	genOccludedGeometry	(void) const
1168	{
1169		return ObjectData(glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f));
1170	}
1171
1172	virtual void		logDescription		(void)
1173	{
1174		TestLog& log = m_testCtx.getLog();
1175
1176		log << TestLog::Section("Description", "Test description");
1177		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1178		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1179		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1180		log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge of the occluding quad has a depth of 0.0." << TestLog::EndMessage;
1181		log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage;
1182		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1183		log << TestLog::EndSection;
1184	}
1185
1186	const float			m_gradientDistance;
1187};
1188
1189// Constant offset to frag depth in occluder
1190class OccluderStaticFragDepthCostCase : public RenderCountCase
1191{
1192public:
1193						OccluderStaticFragDepthCostCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1194							: RenderCountCase(testCtx, renderCtx, name, desc)
1195						{
1196						}
1197
1198						~OccluderStaticFragDepthCostCase	(void) {}
1199
1200private:
1201	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1202	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::fastQuad(0.8f); }
1203
1204	virtual void		logDescription						(void)
1205	{
1206		TestLog& log = m_testCtx.getLog();
1207
1208		log << TestLog::Section("Description", "Test description");
1209		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1210		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1211		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1212		log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1213		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1214		log << TestLog::EndSection;
1215	}
1216};
1217
1218// Dynamic offset to frag depth in occluder
1219class OccluderDynamicFragDepthCostCase : public RenderCountCase
1220{
1221public:
1222						OccluderDynamicFragDepthCostCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1223							: RenderCountCase(testCtx, renderCtx, name, desc)
1224						{
1225						}
1226
1227						~OccluderDynamicFragDepthCostCase	(void) {}
1228
1229private:
1230	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1231	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::fastQuad(0.8f); }
1232
1233	virtual void		logDescription						(void)
1234	{
1235		TestLog& log = m_testCtx.getLog();
1236
1237		log << TestLog::Section("Description", "Test description");
1238		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1239		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1240		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1241		log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1242		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1243		log << TestLog::EndSection;
1244	}
1245};
1246
1247// Constant offset to frag depth in occluder
1248class OccludedStaticFragDepthCostCase : public RenderCountCase
1249{
1250public:
1251						OccludedStaticFragDepthCostCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1252							: RenderCountCase(testCtx, renderCtx, name, desc)
1253						{
1254						}
1255
1256						~OccludedStaticFragDepthCostCase	(void) {}
1257
1258private:
1259	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1260	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1261
1262	virtual void		logDescription						(void)
1263	{
1264		TestLog& log = m_testCtx.getLog();
1265
1266		log << TestLog::Section("Description", "Test description");
1267		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1268		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1269		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1270		log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1271		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1272		log << TestLog::EndSection;
1273	}
1274};
1275
1276// Dynamic offset to frag depth in occluder
1277class OccludedDynamicFragDepthCostCase : public RenderCountCase
1278{
1279public:
1280						OccludedDynamicFragDepthCostCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1281							: RenderCountCase(testCtx, renderCtx, name, desc)
1282						{
1283						}
1284
1285						~OccludedDynamicFragDepthCostCase	(void) {}
1286
1287private:
1288	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1289	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1290
1291	virtual void		logDescription						(void)
1292	{
1293		TestLog& log = m_testCtx.getLog();
1294
1295		log << TestLog::Section("Description", "Test description");
1296		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1297		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage;
1298		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage;
1299		log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1300		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1301		log << TestLog::EndSection;
1302	}
1303};
1304
1305// Culling speed with slightly less trivial geometry
1306class OccludingGeometryComplexityCostCase : public RenderCountCase
1307{
1308public:
1309						OccludingGeometryComplexityCostCase		(TestContext&			testCtx,
1310																 const RenderContext&	renderCtx,
1311																 const char*			name,
1312																 const char*			desc,
1313																 int					resolution,
1314																 float					xyNoise,
1315																 float					zNoise)
1316							: RenderCountCase	(testCtx, renderCtx, name, desc)
1317							, m_resolution		(resolution)
1318							, m_xyNoise			(xyNoise)
1319							, m_zNoise			(zNoise)
1320						{
1321						}
1322
1323						~OccludingGeometryComplexityCostCase	(void) {}
1324
1325private:
1326	virtual ObjectData	genOccluderGeometry						(void) const
1327	{
1328		return ObjectData(Utils::getBaseShader(),
1329						  Utils::getFullScreenGrid(m_resolution,
1330						  deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(),
1331						  0.2f,
1332						  m_zNoise,
1333						  m_xyNoise));
1334	}
1335
1336	virtual ObjectData	genOccludedGeometry						(void) const { return Utils::variableQuad(0.8f); }
1337
1338	virtual void		logDescription		(void)
1339	{
1340		TestLog& log = m_testCtx.getLog();
1341
1342		log << TestLog::Section("Description", "Test description");
1343		log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage;
1344		log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage;
1345		log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered"  << TestLog::EndMessage;
1346		log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload"  << TestLog::EndMessage;
1347		log << TestLog::EndSection;
1348	}
1349
1350	const int			m_resolution;
1351	const float			m_xyNoise;
1352	const float			m_zNoise;
1353};
1354
1355
1356// Cases with varying workloads in the fragment shader
1357class FragmentWorkloadCullCase : public RelativeChangeCase
1358{
1359public:
1360						FragmentWorkloadCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
1361	virtual				~FragmentWorkloadCullCase	(void) {}
1362
1363private:
1364	virtual ObjectData	genOccluderGeometry			(void) const { return Utils::fastQuad(0.2f); }
1365
1366	virtual void		logDescription				(void);
1367};
1368
1369FragmentWorkloadCullCase::FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1370	: RelativeChangeCase	(testCtx, renderCtx, name, desc)
1371{
1372}
1373
1374void FragmentWorkloadCullCase::logDescription (void)
1375{
1376	TestLog& log = m_testCtx.getLog();
1377
1378	log << TestLog::Section("Description", "Test description");
1379	log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage;
1380	log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader,"
1381		"the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1382	log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1383	log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1384	log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1385	log << TestLog::EndSection;
1386}
1387
1388// Additional workload consists of texture lookups
1389class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase
1390{
1391public:
1392						FragmentTextureWorkloadCullCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc);
1393	virtual 			~FragmentTextureWorkloadCullCase	(void) {}
1394
1395	virtual void		init								(void);
1396	virtual void		deinit								(void);
1397
1398private:
1399	typedef MovePtr<glu::Texture> TexPtr;
1400
1401	virtual ObjectData	genOccludedGeometry					(void) const
1402	{
1403		return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f));
1404	}
1405
1406	TexPtr				m_texture;
1407};
1408
1409FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1410	: FragmentWorkloadCullCase	(testCtx, renderCtx, name, desc)
1411{
1412}
1413
1414void FragmentTextureWorkloadCullCase::init (void)
1415{
1416	const glw::Functions&	gl		= m_renderCtx.getFunctions();
1417	const int				size	= 128;
1418	const vector<deUint8>	data	(size*size*4, 255);
1419
1420	m_texture = MovePtr<glu::Texture>(new glu::Texture(gl));
1421
1422	gl.bindTexture(GL_TEXTURE_2D, m_texture);
1423	gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
1424	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1425	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1426}
1427
1428void FragmentTextureWorkloadCullCase::deinit (void)
1429{
1430	m_texture.clear();
1431}
1432
1433// Additional workload consists of arithmetic
1434class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase
1435{
1436public:
1437						FragmentArithmeticWorkloadCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1438						: FragmentWorkloadCullCase	(testCtx, renderCtx, name, desc)
1439					{
1440					}
1441	virtual				~FragmentArithmeticWorkloadCullCase	(void) {}
1442
1443private:
1444	virtual ObjectData	genOccludedGeometry					(void) const
1445	{
1446		return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f));
1447	}
1448};
1449
1450// Contains dynamicly unused discard after a series of calculations
1451class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase
1452{
1453public:
1454						FragmentDiscardArithmeticWorkloadCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1455						: FragmentWorkloadCullCase	(testCtx, renderCtx, name, desc)
1456					{
1457					}
1458
1459	virtual				~FragmentDiscardArithmeticWorkloadCullCase	(void) {}
1460
1461private:
1462	virtual ObjectData	genOccludedGeometry							(void) const
1463	{
1464		return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f));
1465	}
1466
1467	virtual void		logDescription								(void)
1468	{
1469		TestLog& log = m_testCtx.getLog();
1470
1471		log << TestLog::Section("Description", "Test description");
1472		log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage;
1473		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader,"
1474			"the second (occluded) contains significant fragment shader work and a discard that is never triggers but has a dynamic condition" << TestLog::EndMessage;
1475		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1476		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1477		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1478		log << TestLog::EndSection;
1479	}
1480};
1481
1482// Discards fragments from the occluder in a grid pattern
1483class PartialOccluderDiscardCullCase : public RelativeChangeCase
1484{
1485public:
1486						PartialOccluderDiscardCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int gridsize)
1487							: RelativeChangeCase		(testCtx, renderCtx, name, desc)
1488							, m_gridsize	(gridsize)
1489						{
1490						}
1491	virtual				~PartialOccluderDiscardCullCase	(void) {}
1492
1493private:
1494	virtual ObjectData	genOccluderGeometry				(void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); }
1495	virtual ObjectData	genOccludedGeometry				(void) const { return Utils::slowQuad(0.8f); }
1496
1497	virtual void		logDescription					(void)
1498	{
1499		TestLog& log = m_testCtx.getLog();
1500
1501		log << TestLog::Section("Description", "Test description");
1502		log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage;
1503		log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the "
1504			"fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage;
1505		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1506		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1507		log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time"  << TestLog::EndMessage;
1508		log << TestLog::EndSection;
1509	}
1510
1511	const int			m_gridsize;
1512};
1513
1514// Trivial occluder covering part of screen
1515class PartialOccluderCullCase : public RelativeChangeCase
1516{
1517public:
1518						PartialOccluderCullCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float coverage)
1519							: RelativeChangeCase		(testCtx, renderCtx, name, desc)
1520							, m_coverage	(coverage)
1521						{
1522						}
1523						~PartialOccluderCullCase	(void) {}
1524
1525private:
1526	virtual ObjectData	genOccluderGeometry			(void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); }
1527	virtual ObjectData	genOccludedGeometry			(void) const {return Utils::slowQuad(0.8f); }
1528
1529	virtual void		logDescription				(void)
1530	{
1531		TestLog& log = m_testCtx.getLog();
1532
1533		log << TestLog::Section("Description", "Test description");
1534		log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage;
1535		log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage*100.0f
1536			<< "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment shader work" << TestLog::EndMessage;
1537		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1538		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1539		log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area"  << TestLog::EndMessage;
1540		log << TestLog::EndSection;
1541	}
1542
1543	const float			m_coverage;
1544};
1545
1546// Constant offset to frag depth in occluder
1547class StaticOccluderFragDepthCullCase : public RelativeChangeCase
1548{
1549public:
1550						StaticOccluderFragDepthCullCase		(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1551							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1552						{
1553						}
1554
1555						~StaticOccluderFragDepthCullCase	(void) {}
1556
1557private:
1558	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); }
1559	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::slowQuad(0.8f); }
1560
1561	virtual void		logDescription						(void)
1562	{
1563		TestLog& log = m_testCtx.getLog();
1564
1565		log << TestLog::Section("Description", "Test description");
1566		log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage;
1567		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1568		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1569		log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1570		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1571		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1572		log << TestLog::EndSection;
1573	}
1574};
1575
1576// Dynamic offset to frag depth in occluder
1577class DynamicOccluderFragDepthCullCase : public RelativeChangeCase
1578{
1579public:
1580						DynamicOccluderFragDepthCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1581							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1582						{
1583						}
1584
1585						~DynamicOccluderFragDepthCullCase	(void) {}
1586
1587private:
1588	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); }
1589	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::slowQuad(0.8f); }
1590
1591	virtual void		logDescription						(void)
1592	{
1593		TestLog& log = m_testCtx.getLog();
1594
1595		log << TestLog::Section("Description", "Test description");
1596		log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage;
1597		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1598		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1599		log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1600		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1601		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1602		log << TestLog::EndSection;
1603	}
1604};
1605
1606// Constant offset to frag depth in occluded
1607class StaticOccludedFragDepthCullCase : public RelativeChangeCase
1608{
1609public:
1610						StaticOccludedFragDepthCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1611							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1612						{
1613						}
1614
1615						~StaticOccludedFragDepthCullCase	(void) {}
1616
1617private:
1618	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1619	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); }
1620
1621	virtual void		logDescription						(void)
1622	{
1623		TestLog& log = m_testCtx.getLog();
1624
1625		log << TestLog::Section("Description", "Test description");
1626		log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage;
1627		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1628		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1629		log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage;
1630		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1631		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1632		log << TestLog::EndSection;
1633	}
1634};
1635
1636// Dynamic offset to frag depth in occluded
1637class DynamicOccludedFragDepthCullCase : public RelativeChangeCase
1638{
1639public:
1640						DynamicOccludedFragDepthCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1641							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1642						{
1643						}
1644
1645						~DynamicOccludedFragDepthCullCase	(void) {}
1646
1647private:
1648	virtual ObjectData	genOccluderGeometry					(void) const { return Utils::fastQuad(0.2f); }
1649	virtual ObjectData	genOccludedGeometry					(void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); }
1650
1651	virtual void		logDescription						(void)
1652	{
1653		TestLog& log = m_testCtx.getLog();
1654
1655		log << TestLog::Section("Description", "Test description");
1656		log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage;
1657		log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1658		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1659		log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage;
1660		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1661		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1662		log << TestLog::EndSection;
1663	}
1664};
1665
1666// Dynamic offset to frag depth in occluded
1667class ReversedDepthOrderCullCase : public RelativeChangeCase
1668{
1669public:
1670						ReversedDepthOrderCullCase	(TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc)
1671							: RelativeChangeCase(testCtx, renderCtx, name, desc)
1672						{
1673						}
1674
1675						~ReversedDepthOrderCullCase	(void) {}
1676
1677private:
1678	virtual ObjectData	genOccluderGeometry			(void) const { return Utils::fastQuad(0.2f); }
1679	virtual ObjectData	genOccludedGeometry			(void) const { return Utils::slowQuad(0.8f); }
1680
1681	virtual void		logDescription				(void)
1682	{
1683		TestLog& log = m_testCtx.getLog();
1684
1685		log << TestLog::Section("Description", "Test description");
1686		log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage;
1687		log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first (occluded) contains significant fragment shader work" << TestLog::EndMessage;
1688		log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader"  << TestLog::EndMessage;
1689		log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared"  << TestLog::EndMessage;
1690		log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time"  << TestLog::EndMessage;
1691		log << TestLog::EndSection;
1692	}
1693
1694	// Rendering order of occluder & occluded is reversed, otherwise identical to parent version
1695	Sample				renderSample				(const RenderData& occluder, const RenderData& occluded, int workload) const
1696	{
1697		const glw::Functions&	gl		= m_renderCtx.getFunctions();
1698		const GLuint			program	= occluded.m_program.getProgram();
1699		Sample					sample;
1700		deUint64				now		= 0;
1701		deUint64				prev	= 0;
1702		deUint8					buffer[4];
1703
1704		gl.useProgram(program);
1705		gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload);
1706
1707		// Warmup (this workload seems to reduce variation in following workloads)
1708		{
1709			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1710			gl.disable(GL_DEPTH_TEST);
1711
1712			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1713		}
1714
1715		// Null time
1716		{
1717			prev = deGetMicroseconds();
1718
1719			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1720			gl.disable(GL_DEPTH_TEST);
1721
1722			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1723
1724			now = deGetMicroseconds();
1725
1726			sample.nullTime = now - prev;
1727		}
1728
1729		// Test time
1730		{
1731			prev = deGetMicroseconds();
1732
1733			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1734			gl.enable(GL_DEPTH_TEST);
1735
1736			render(occluded);
1737			render(occluder);
1738
1739			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1740
1741			now = deGetMicroseconds();
1742
1743			sample.testTime = now - prev;
1744		}
1745
1746		// Base time
1747		{
1748			prev = deGetMicroseconds();
1749
1750			gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
1751			gl.disable(GL_DEPTH_TEST);
1752
1753			render(occluded);
1754			render(occluder);
1755
1756			gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer);
1757
1758			now = deGetMicroseconds();
1759
1760			sample.baseTime = now - prev;
1761		}
1762
1763		sample.workload = 0;
1764
1765		return sample;
1766	}
1767};
1768
1769} // Anonymous
1770
1771DepthTests::DepthTests (Context& context)
1772	: TestCaseGroup (context, "depth", "Depth culling performance")
1773{
1774}
1775
1776void DepthTests::init (void)
1777{
1778	TestContext&			testCtx		= m_context.getTestContext();
1779	const RenderContext&	renderCtx	= m_context.getRenderContext();
1780
1781	{
1782		tcu::TestCaseGroup* const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency");
1783
1784		addChild(cullEfficiencyGroup);
1785
1786		{
1787			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload");
1788
1789			cullEfficiencyGroup->addChild(group);
1790
1791			group->addChild(new FragmentTextureWorkloadCullCase(			testCtx, renderCtx, "workload_texture",				"Fragment shader with texture lookup workload"));
1792			group->addChild(new FragmentArithmeticWorkloadCullCase(			testCtx, renderCtx, "workload_arithmetic",			"Fragment shader with arithmetic workload"));
1793			group->addChild(new FragmentDiscardArithmeticWorkloadCullCase(	testCtx, renderCtx, "workload_arithmetic_discard",	"Fragment shader that may discard with arithmetic workload"));
1794		}
1795
1796		{
1797			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard");
1798
1799			cullEfficiencyGroup->addChild(group);
1800
1801			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256",	"Parts of occluder geometry discarded", 256));
1802			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128",	"Parts of occluder geometry discarded", 128));
1803			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64",	"Parts of occluder geometry discarded", 64));
1804			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32",	"Parts of occluder geometry discarded", 32));
1805			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16",	"Parts of occluder geometry discarded", 16));
1806			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8",	"Parts of occluder geometry discarded", 8));
1807			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4",	"Parts of occluder geometry discarded", 4));
1808			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2",	"Parts of occluder geometry discarded", 2));
1809			group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1",	"Parts of occluder geometry discarded", 1));
1810		}
1811
1812		{
1813			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage");
1814
1815			cullEfficiencyGroup->addChild(group);
1816
1817			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f));
1818			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f));
1819			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f));
1820			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f));
1821			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f));
1822			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f));
1823			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f));
1824			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f));
1825			group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f));
1826		}
1827
1828		{
1829			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage");
1830
1831			cullEfficiencyGroup->addChild(group);
1832
1833			group->addChild(new StaticOccluderFragDepthCullCase( testCtx, renderCtx, "occluder_static", ""));
1834			group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", ""));
1835			group->addChild(new StaticOccludedFragDepthCullCase( testCtx, renderCtx, "occluded_static", ""));
1836			group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", ""));
1837		}
1838
1839		{
1840			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order");
1841
1842			cullEfficiencyGroup->addChild(group);
1843
1844			group->addChild(new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order"));
1845		}
1846	}
1847
1848	{
1849		tcu::TestCaseGroup* const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency");
1850
1851		addChild(testCostGroup);
1852
1853		{
1854			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences");
1855
1856			testCostGroup->addChild(group);
1857
1858			group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", ""));
1859			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f));
1860			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f));
1861			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f));
1862			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f));
1863			group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f));
1864		}
1865
1866		{
1867			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity");
1868
1869			testCostGroup->addChild(group);
1870
1871			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5",   "", 5,   0.0f, 0.0f));
1872			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15",  "", 15,  0.0f, 0.0f));
1873			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25",  "", 25,  0.0f, 0.0f));
1874			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50",  "", 50,  0.0f, 0.0f));
1875			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f));
1876
1877			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5",   "", 5,   1.0f/5.0f,   0.0f));
1878			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15",  "", 15,  1.0f/15.0f,  0.0f));
1879			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25",  "", 25,  1.0f/25.0f,  0.0f));
1880			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50",  "", 50,  1.0f/50.0f,  0.0f));
1881			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f/100.0f, 0.0f));
1882
1883			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5",   "", 5,   0.0f, 0.2f));
1884			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15",  "", 15,  0.0f, 0.2f));
1885			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25",  "", 25,  0.0f, 0.2f));
1886			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50",  "", 50,  0.0f, 0.2f));
1887			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f));
1888
1889			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5",   "", 5,   1.0f/5.0f,   0.2f));
1890			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15",  "", 15,  1.0f/15.0f,  0.2f));
1891			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25",  "", 25,  1.0f/25.0f,  0.2f));
1892			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50",  "", 50,  1.0f/50.0f,  0.2f));
1893			group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f/100.0f, 0.2f));
1894		}
1895
1896		{
1897			tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth");
1898
1899			testCostGroup->addChild(group);
1900
1901			group->addChild(new OccluderStaticFragDepthCostCase( testCtx, renderCtx, "occluder_static", ""));
1902			group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", ""));
1903			group->addChild(new OccludedStaticFragDepthCostCase( testCtx, renderCtx, "occluded_static", ""));
1904			group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", ""));
1905		}
1906	}
1907}
1908
1909} // Performance
1910} // gles3
1911} // deqp
1912