1/*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.0 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Depth buffer performance tests. 22 *//*--------------------------------------------------------------------*/ 23 24#include "es3pDepthTests.hpp" 25 26#include "glsCalibration.hpp" 27 28#include "gluShaderProgram.hpp" 29#include "gluObjectWrapper.hpp" 30#include "gluPixelTransfer.hpp" 31 32#include "glwFunctions.hpp" 33#include "glwEnums.hpp" 34 35#include "tcuTestLog.hpp" 36#include "tcuStringTemplate.hpp" 37#include "tcuCPUWarmup.hpp" 38#include "tcuCommandLine.hpp" 39#include "tcuResultCollector.hpp" 40 41#include "deClock.h" 42#include "deString.h" 43#include "deMath.h" 44#include "deStringUtil.hpp" 45#include "deRandom.hpp" 46#include "deUniquePtr.hpp" 47 48#include <vector> 49#include <algorithm> 50 51namespace deqp 52{ 53namespace gles3 54{ 55namespace Performance 56{ 57namespace 58{ 59using namespace glw; 60using de::MovePtr; 61using tcu::TestContext; 62using tcu::TestLog; 63using tcu::Vec4; 64using tcu::Vec3; 65using tcu::Vec2; 66using glu::RenderContext; 67using glu::ProgramSources; 68using glu::ShaderSource; 69using std::vector; 70using std::string; 71using std::map; 72 73struct Sample 74{ 75 deInt64 nullTime; 76 deInt64 baseTime; 77 deInt64 testTime; 78 int order; 79 int workload; 80}; 81 82struct SampleParams 83{ 84 int step; 85 int measurement; 86 87 SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) {} 88}; 89 90typedef vector<float> Geometry; 91 92struct ObjectData 93{ 94 ProgramSources shader; 95 Geometry geometry; 96 97 ObjectData (const ProgramSources& shader_, const Geometry& geometry_) : shader(shader_), geometry(geometry_) {} 98}; 99 100class RenderData 101{ 102public: 103 RenderData (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log); 104 ~RenderData (void) {}; 105 106 const glu::ShaderProgram m_program; 107 const glu::VertexArray m_vao; 108 const glu::Buffer m_vbo; 109 110 const int m_numVertices; 111}; 112 113RenderData::RenderData (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log) 114 : m_program (renderCtx, object.shader) 115 , m_vao (renderCtx.getFunctions()) 116 , m_vbo (renderCtx.getFunctions()) 117 , m_numVertices (int(object.geometry.size())/4) 118{ 119 const glw::Functions& gl = renderCtx.getFunctions(); 120 121 if (!m_program.isOk()) 122 log << m_program; 123 124 gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo); 125 gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW); 126 gl.bindAttribLocation(m_program.getProgram(), 0, "a_position"); 127 128 gl.bindVertexArray(*m_vao); 129 gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); 130 gl.enableVertexAttribArray(0); 131 gl.bindVertexArray(0); 132} 133 134namespace Utils 135{ 136 vector<float> getFullscreenQuad (float depth) 137 { 138 const float data[] = 139 { 140 +1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own 141 +1.0f, -1.0f, depth, 1.0f, 142 -1.0f, -1.0f, depth, 2.0f, 143 -1.0f, -1.0f, depth, 0.0f, 144 -1.0f, +1.0f, depth, 1.0f, 145 +1.0f, +1.0f, depth, 2.0f, 146 }; 147 148 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); 149 } 150 151 vector<float> getFullscreenQuadWithGradient (float depth0, float depth1) 152 { 153 const float data[] = 154 { 155 +1.0f, +1.0f, depth0, 0.0f, 156 +1.0f, -1.0f, depth0, 1.0f, 157 -1.0f, -1.0f, depth1, 2.0f, 158 -1.0f, -1.0f, depth1, 0.0f, 159 -1.0f, +1.0f, depth1, 1.0f, 160 +1.0f, +1.0f, depth0, 2.0f, 161 }; 162 163 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); 164 } 165 166 vector<float> getPartScreenQuad (float coverage, float depth) 167 { 168 const float xMax = -1.0f + 2.0f*coverage; 169 const float data[] = 170 { 171 xMax, +1.0f, depth, 0.0f, 172 xMax, -1.0f, depth, 1.0f, 173 -1.0f, -1.0f, depth, 2.0f, 174 -1.0f, -1.0f, depth, 0.0f, 175 -1.0f, +1.0f, depth, 1.0f, 176 xMax, +1.0f, depth, 2.0f, 177 }; 178 179 return vector<float>(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); 180 } 181 182 // Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise 183 vector<float> getFullScreenGrid (int resolution, deUint32 seed, float baseDepth, float depthNoise, float xyNoise) 184 { 185 const int gridsize = resolution+1; 186 vector<Vec3> vertices (gridsize*gridsize); 187 vector<float> retval; 188 de::Random rng (seed); 189 190 for (int y = 0; y < gridsize; y++) 191 for (int x = 0; x < gridsize; x++) 192 { 193 const bool isEdge = x == 0 || y == 0 || x == resolution || y == resolution; 194 const float x_ = float(x)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); 195 const float y_ = float(y)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); 196 const float z_ = baseDepth + rng.getFloat(-depthNoise, +depthNoise); 197 198 vertices[y*gridsize + x] = Vec3(x_, y_, z_); 199 } 200 201 retval.reserve(resolution*resolution*6); 202 203 for (int y = 0; y < resolution; y++) 204 for (int x = 0; x < resolution; x++) 205 { 206 const Vec3& p0 = vertices[(y+0)*gridsize + (x+0)]; 207 const Vec3& p1 = vertices[(y+0)*gridsize + (x+1)]; 208 const Vec3& p2 = vertices[(y+1)*gridsize + (x+0)]; 209 const Vec3& p3 = vertices[(y+1)*gridsize + (x+1)]; 210 211 const float temp[6*4] = 212 { 213 p0.x(), p0.y(), p0.z(), 0.0f, 214 p2.x(), p2.y(), p2.z(), 1.0f, 215 p1.x(), p1.y(), p1.z(), 2.0f, 216 217 p3.x(), p3.y(), p3.z(), 0.0f, 218 p1.x(), p1.y(), p1.z(), 1.0f, 219 p2.x(), p2.y(), p2.z(), 2.0f, 220 }; 221 222 retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp)); 223 } 224 225 return retval; 226 } 227 228 // Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader 229 string getBaseVertexShader (void) 230 { 231 return "#version 300 es\n" 232 "in highp vec4 a_position;\n" 233 "out mediump vec3 v_bcoords;\n" 234 "void main()\n" 235 "{\n" 236 " v_bcoords = vec3(0, 0, 0);\n" 237 " v_bcoords[int(a_position.w)] = 1.0;\n" 238 " gl_Position = vec4(a_position.xyz, 1.0);\n" 239 "}\n"; 240 } 241 242 // Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords 243 string getInstanceNoiseVertexShader (void) 244 { 245 return "#version 300 es\n" 246 "in highp vec4 a_position;\n" 247 "out mediump vec3 v_bcoords;\n" 248 "void main()\n" 249 "{\n" 250 " v_bcoords = vec3(0, 0, 0);\n" 251 " v_bcoords[int(a_position.w)] = 1.0;\n" 252 " vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), sin(float(gl_InstanceID)*1.71));\n" 253 " gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n" 254 "}\n"; 255 } 256 257 // Renders green triangles with edges highlighted. Exact shade depends on depth. 258 string getDepthAsGreenFragmentShader (void) 259 { 260 return "#version 300 es\n" 261 "in mediump vec3 v_bcoords;\n" 262 "out mediump vec4 fragColor;\n" 263 "void main()\n" 264 "{\n" 265 " mediump float d = gl_FragCoord.z;\n" 266 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 267 " fragColor = vec4(d,1,d,1);\n" 268 " else\n" 269 " fragColor = vec4(0,d,0,1);\n" 270 "}\n"; 271 } 272 273 // Renders green triangles with edges highlighted. Exact shade depends on depth. 274 string getDepthAsRedFragmentShader (void) 275 { 276 return "#version 300 es\n" 277 "in mediump vec3 v_bcoords;\n" 278 "out mediump vec4 fragColor;\n" 279 "void main()\n" 280 "{\n" 281 " mediump float d = gl_FragCoord.z;\n" 282 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 283 " fragColor = vec4(1,d,d,1);\n" 284 " else\n" 285 " fragColor = vec4(d,0,0,1);\n" 286 "}\n"; 287 } 288 289 // Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth. 290 string getArithmeticWorkloadFragmentShader (void) 291 { 292 293 return "#version 300 es\n" 294 "in mediump vec3 v_bcoords;\n" 295 "out mediump vec4 fragColor;\n" 296 "uniform mediump int u_iterations;\n" 297 "void main()\n" 298 "{\n" 299 " mediump float d = gl_FragCoord.z;\n" 300 " for (int i = 0; i<u_iterations; i++)\n" 301 // cos(a)^2 + sin(a)^2 == 1. since d is in range [0,1] this will lose a few ULP's of precision per iteration but should not significantly change the value of d without extreme iteration counts 302 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 303 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 304 " fragColor = vec4(1,d,d,1);\n" 305 " else\n" 306 " fragColor = vec4(d,0,0,1);\n" 307 "}\n"; 308 } 309 310 // Arithmetic workload shader but contains discard 311 string getArithmeticWorkloadDiscardFragmentShader (void) 312 { 313 return "#version 300 es\n" 314 "in mediump vec3 v_bcoords;\n" 315 "out mediump vec4 fragColor;\n" 316 "uniform mediump int u_iterations;\n" 317 "void main()\n" 318 "{\n" 319 " mediump float d = gl_FragCoord.z;\n" 320 " for (int i = 0; i<u_iterations; i++)\n" 321 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 322 " if (d < 0.5) discard;\n" 323 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 324 " fragColor = vec4(1,d,d,1);\n" 325 " else\n" 326 " fragColor = vec4(d,0,0,1);\n" 327 "}\n"; 328 } 329 330 // Texture fetch based time waster. Renders red triangles with edges highlighted. Exact shade depends on depth. 331 string getTextureWorkloadFragmentShader (void) 332 { 333 return "#version 300 es\n" 334 "in mediump vec3 v_bcoords;\n" 335 "out mediump vec4 fragColor;\n" 336 "uniform mediump int u_iterations;\n" 337 "uniform sampler2D u_texture;\n" 338 "void main()\n" 339 "{\n" 340 " mediump float d = gl_FragCoord.z;\n" 341 " for (int i = 0; i<u_iterations; i++)\n" 342 " d *= texture(u_texture, (gl_FragCoord.xy+vec2(i))/512.0).r;\n" // Texture is expected to be fully white 343 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 344 " fragColor = vec4(1,1,1,1);\n" 345 " else\n" 346 " fragColor = vec4(d,0,0,1);\n" 347 "}\n"; 348 } 349 350 // Discard fragments in a grid pattern 351 string getGridDiscardFragmentShader (int gridsize) 352 { 353 const string fragSrc = "#version 300 es\n" 354 "in mediump vec3 v_bcoords;\n" 355 "out mediump vec4 fragColor;\n" 356 "void main()\n" 357 "{\n" 358 " mediump float d = gl_FragCoord.z;\n" 359 " if ((int(gl_FragCoord.x)/${GRIDRENDER_SIZE} + int(gl_FragCoord.y)/${GRIDRENDER_SIZE})%2 == 0)\n" 360 " discard;\n" 361 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 362 " fragColor = vec4(d,1,d,1);\n" 363 " else\n" 364 " fragColor = vec4(0,d,0,1);\n" 365 "}\n"; 366 map<string, string> params; 367 368 params["GRIDRENDER_SIZE"] = de::toString(gridsize); 369 370 return tcu::StringTemplate(fragSrc).specialize(params); 371 } 372 373 // A static increment to frag depth 374 string getStaticFragDepthFragmentShader (void) 375 { 376 return "#version 300 es\n" 377 "in mediump vec3 v_bcoords;\n" 378 "out mediump vec4 fragColor;\n" 379 "void main()\n" 380 "{\n" 381 " mediump float d = gl_FragCoord.z;\n" 382 " gl_FragDepth = gl_FragCoord.z + 0.1;\n" 383 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 384 " fragColor = vec4(d,1,d,1);\n" 385 " else\n" 386 " fragColor = vec4(0,d,0,1);\n" 387 "}\n"; 388 } 389 390 // A trivial dynamic change to frag depth 391 string getDynamicFragDepthFragmentShader (void) 392 { 393 return "#version 300 es\n" 394 "in mediump vec3 v_bcoords;\n" 395 "out mediump vec4 fragColor;\n" 396 "void main()\n" 397 "{\n" 398 " mediump float d = gl_FragCoord.z;\n" 399 " gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1 400 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 401 " fragColor = vec4(d,1,d,1);\n" 402 " else\n" 403 " fragColor = vec4(0,d,0,1);\n" 404 "}\n"; 405 } 406 407 // A static increment to frag depth 408 string getStaticFragDepthArithmeticWorkloadFragmentShader (void) 409 { 410 return "#version 300 es\n" 411 "in mediump vec3 v_bcoords;\n" 412 "out mediump vec4 fragColor;\n" 413 "uniform mediump int u_iterations;\n" 414 "void main()\n" 415 "{\n" 416 " mediump float d = gl_FragCoord.z;\n" 417 " gl_FragDepth = gl_FragCoord.z + 0.1;\n" 418 " for (int i = 0; i<u_iterations; i++)\n" 419 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 420 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 421 " fragColor = vec4(1,d,d,1);\n" 422 " else\n" 423 " fragColor = vec4(d,0,0,1);\n" 424 "}\n"; 425 } 426 427 // A trivial dynamic change to frag depth 428 string getDynamicFragDepthArithmeticWorkloadFragmentShader (void) 429 { 430 return "#version 300 es\n" 431 "in mediump vec3 v_bcoords;\n" 432 "out mediump vec4 fragColor;\n" 433 "uniform mediump int u_iterations;\n" 434 "void main()\n" 435 "{\n" 436 " mediump float d = gl_FragCoord.z;\n" 437 " gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1 438 " for (int i = 0; i<u_iterations; i++)\n" 439 " d = d*sin(d)*sin(d) + d*cos(d)*cos(d);\n" 440 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" 441 " fragColor = vec4(1,d,d,1);\n" 442 " else\n" 443 " fragColor = vec4(d,0,0,1);\n" 444 "}\n"; 445 } 446 447 glu::ProgramSources getBaseShader (void) 448 { 449 return glu::makeVtxFragSources(getBaseVertexShader(), getDepthAsGreenFragmentShader()); 450 } 451 452 glu::ProgramSources getArithmeticWorkloadShader (void) 453 { 454 return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadFragmentShader()); 455 } 456 457 glu::ProgramSources getArithmeticWorkloadDiscardShader (void) 458 { 459 return glu::makeVtxFragSources(getBaseVertexShader(), getArithmeticWorkloadDiscardFragmentShader()); 460 } 461 462 glu::ProgramSources getTextureWorkloadShader (void) 463 { 464 return glu::makeVtxFragSources(getBaseVertexShader(), getTextureWorkloadFragmentShader()); 465 } 466 467 glu::ProgramSources getGridDiscardShader (int gridsize) 468 { 469 return glu::makeVtxFragSources(getBaseVertexShader(), getGridDiscardFragmentShader(gridsize)); 470 } 471 472 inline ObjectData quadWith (const glu::ProgramSources& shader, float depth) 473 { 474 return ObjectData(shader, getFullscreenQuad(depth)); 475 } 476 477 inline ObjectData quadWith (const string& fragShader, float depth) 478 { 479 return ObjectData(glu::makeVtxFragSources(getBaseVertexShader(), fragShader), getFullscreenQuad(depth)); 480 } 481 482 inline ObjectData variableQuad (float depth) 483 { 484 return ObjectData(glu::makeVtxFragSources(getInstanceNoiseVertexShader(), getDepthAsRedFragmentShader()), getFullscreenQuad(depth)); 485 } 486 487 inline ObjectData fastQuad (float depth) 488 { 489 return ObjectData(getBaseShader(), getFullscreenQuad(depth)); 490 } 491 492 inline ObjectData slowQuad (float depth) 493 { 494 return ObjectData(getArithmeticWorkloadShader(), getFullscreenQuad(depth)); 495 } 496 497 inline ObjectData fastQuadWithGradient (float depth0, float depth1) 498 { 499 return ObjectData(getBaseShader(), getFullscreenQuadWithGradient(depth0, depth1)); 500 } 501} // Utils 502 503// Shared base 504class BaseCase : public tcu::TestCase 505{ 506public: 507 enum {RENDER_SIZE = 512}; 508 509 BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 510 virtual ~BaseCase (void) {} 511 512 virtual IterateResult iterate (void); 513 514protected: 515 void logSamples (const vector<Sample>& samples, const string& name, const string& desc); 516 void logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg); 517 virtual void logAnalysis (const vector<Sample>& samples) = 0; 518 virtual void logDescription (void) = 0; 519 520 virtual ObjectData genOccluderGeometry (void) const = 0; 521 virtual ObjectData genOccludedGeometry (void) const = 0; 522 523 virtual int calibrate (void) const = 0; 524 virtual Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const = 0; 525 526 void render (const RenderData& data) const; 527 void render (const RenderData& data, int instances) const; 528 529 const RenderContext& m_renderCtx; 530 tcu::ResultCollector m_results; 531 532 enum {ITERATION_STEPS = 10, ITERATION_SAMPLES = 16}; 533}; 534 535BaseCase::BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 536 : TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, desc) 537 , m_renderCtx (renderCtx) 538{ 539} 540 541BaseCase::IterateResult BaseCase::iterate (void) 542{ 543 typedef de::MovePtr<RenderData> RenderDataP; 544 545 const glw::Functions& gl = m_renderCtx.getFunctions(); 546 TestLog& log = m_testCtx.getLog(); 547 548 const glu::Framebuffer framebuffer (gl); 549 const glu::Renderbuffer renderbuffer (gl); 550 const glu::Renderbuffer depthbuffer (gl); 551 552 vector<Sample> results; 553 vector<int> params; 554 RenderDataP occluderData; 555 RenderDataP occludedData; 556 tcu::TextureLevel resultTex (tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE); 557 int maxWorkload = 0; 558 de::Random rng (deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed()); 559 560 logDescription(); 561 562 gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer); 563 gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE); 564 gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer); 565 gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE); 566 567 gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer); 568 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer); 569 gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer); 570 gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE); 571 gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f); 572 573 maxWorkload = calibrate(); 574 575 // Setup data 576 occluderData = RenderDataP(new RenderData (genOccluderGeometry(), m_renderCtx, log)); 577 occludedData = RenderDataP(new RenderData (genOccludedGeometry(), m_renderCtx, log)); 578 579 TCU_CHECK(occluderData->m_program.isOk()); 580 TCU_CHECK(occludedData->m_program.isOk()); 581 582 // Force initialization of GPU resources 583 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 584 gl.enable(GL_DEPTH_TEST); 585 586 render(*occluderData); 587 render(*occludedData); 588 glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess()); 589 590 logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program); 591 592 params.reserve(ITERATION_STEPS*ITERATION_SAMPLES); 593 594 // Setup parameters 595 for (int step = 0; step < ITERATION_STEPS; step++) 596 { 597 const int workload = maxWorkload*step/ITERATION_STEPS; 598 599 for (int count = 0; count < ITERATION_SAMPLES; count++) 600 params.push_back(workload); 601 } 602 603 rng.shuffle(params.begin(), params.end()); 604 605 // Render samples 606 for (size_t ndx = 0; ndx < params.size(); ndx++) 607 { 608 const int workload = params[ndx]; 609 Sample sample = renderSample(*occluderData, *occludedData, workload); 610 611 sample.workload = workload; 612 sample.order = int(ndx); 613 614 results.push_back(sample); 615 } 616 617 logSamples(results, "Samples", "Samples"); 618 logAnalysis(results); 619 620 m_results.setTestContextResult(m_testCtx); 621 622 return STOP; 623} 624 625void BaseCase::logSamples (const vector<Sample>& samples, const string& name, const string& desc) 626{ 627 TestLog& log = m_testCtx.getLog(); 628 629 bool testOnly = true; 630 631 for (size_t ndx = 0; ndx < samples.size(); ndx++) 632 { 633 if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0) 634 { 635 testOnly = false; 636 break; 637 } 638 } 639 640 log << TestLog::SampleList(name, desc); 641 642 if (testOnly) 643 { 644 log << TestLog::SampleInfo 645 << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 646 << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 647 << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 648 << TestLog::EndSampleInfo; 649 650 for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) 651 { 652 const Sample& sample = samples[sampleNdx]; 653 654 log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample; 655 } 656 } 657 else 658 { 659 log << TestLog::SampleInfo 660 << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 661 << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) 662 << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 663 << TestLog::ValueInfo("NullTime", "Read pixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 664 << TestLog::ValueInfo("BaseTime", "Base render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) 665 << TestLog::EndSampleInfo; 666 667 for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) 668 { 669 const Sample& sample = samples[sampleNdx]; 670 671 log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample; 672 } 673 } 674 675 log << TestLog::EndSampleList; 676} 677 678void BaseCase::logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg) 679{ 680 TestLog& log = m_testCtx.getLog(); 681 682 log << TestLog::Section("Geometry", "Geometry"); 683 log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage; 684 log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage; 685 log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage; 686 687 log << TestLog::Image("Test Geometry", "Test Geometry", sample); 688 log << TestLog::EndSection; 689 690 log << TestLog::Section("Occluder", "Occluder"); 691 log << occluderProg; 692 log << TestLog::EndSection; 693 694 log << TestLog::Section("Occluded", "Occluded"); 695 log << occludedProg; 696 log << TestLog::EndSection; 697} 698 699void BaseCase::render (const RenderData& data) const 700{ 701 const glw::Functions& gl = m_renderCtx.getFunctions(); 702 703 gl.useProgram(data.m_program.getProgram()); 704 705 gl.bindVertexArray(*data.m_vao); 706 gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices); 707 gl.bindVertexArray(0); 708} 709 710void BaseCase::render (const RenderData& data, int instances) const 711{ 712 const glw::Functions& gl = m_renderCtx.getFunctions(); 713 714 gl.useProgram(data.m_program.getProgram()); 715 716 gl.bindVertexArray(*data.m_vao); 717 gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances); 718 gl.bindVertexArray(0); 719} 720 721// Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression 722class RenderCountCase : public BaseCase 723{ 724public: 725 RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 726 ~RenderCountCase (void) {} 727 728protected: 729 virtual void logAnalysis (const vector<Sample>& samples); 730 731private: 732 virtual int calibrate (void) const; 733 virtual Sample renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const; 734}; 735 736RenderCountCase::RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 737 : BaseCase (testCtx, renderCtx, name, desc) 738{ 739} 740 741void RenderCountCase::logAnalysis (const vector<Sample>& samples) 742{ 743 using namespace gls; 744 745 TestLog& log = m_testCtx.getLog(); 746 int maxWorkload = 0; 747 vector<Vec2> testSamples (samples.size()); 748 749 for (size_t ndx = 0; ndx < samples.size(); ndx++) 750 { 751 const Sample& sample = samples[ndx]; 752 753 testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); 754 755 maxWorkload = de::max(maxWorkload, sample.workload); 756 } 757 758 { 759 const float confidence = 0.60f; 760 const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); 761 const float usPerCall = testParam.coefficient; 762 const float pxPerCall = RENDER_SIZE*RENDER_SIZE; 763 const float pxPerUs = pxPerCall/usPerCall; 764 const float mpxPerS = pxPerUs; 765 766 log << TestLog::Section("Linear Regression", "Linear Regression"); 767 log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage; 768 log << TestLog::Message << "Render time for scene with depth test was\n\t" 769 << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" 770 << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" 771 << "us/workload" << TestLog::EndMessage; 772 log << TestLog::EndSection; 773 774 log << TestLog::Section("Result", "Result"); 775 776 if (testParam.coefficientConfidenceLower < 0.0f) 777 { 778 log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible per-pixel cost" << TestLog::EndMessage; 779 m_results.addResult(QP_TEST_RESULT_PASS, "Pass"); 780 } 781 else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25) 782 { 783 log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; 784 m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); 785 } 786 else 787 { 788 log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage; 789 m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2)); 790 } 791 792 log << TestLog::EndSection; 793 } 794} 795 796Sample RenderCountCase::renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const 797{ 798 const glw::Functions& gl = m_renderCtx.getFunctions(); 799 Sample sample; 800 deUint64 now = 0; 801 deUint64 prev = 0; 802 deUint8 buffer[4]; 803 804 // Stabilize 805 { 806 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 807 gl.enable(GL_DEPTH_TEST); 808 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 809 } 810 811 prev = deGetMicroseconds(); 812 813 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 814 gl.enable(GL_DEPTH_TEST); 815 816 render(occluder); 817 render(occluded, callcount); 818 819 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 820 821 now = deGetMicroseconds(); 822 823 sample.testTime = now - prev; 824 sample.baseTime = 0; 825 sample.nullTime = 0; 826 sample.workload = callcount; 827 828 return sample; 829} 830 831int RenderCountCase::calibrate (void) const 832{ 833 using namespace gls; 834 835 const glw::Functions& gl = m_renderCtx.getFunctions(); 836 TestLog& log = m_testCtx.getLog(); 837 838 const RenderData occluderGeometry (genOccluderGeometry(), m_renderCtx, log); 839 const RenderData occludedGeometry (genOccludedGeometry(), m_renderCtx, log); 840 841 TheilSenCalibrator calibrator (CalibratorParameters(20, // Initial workload 842 10, // Max iteration frames 843 20.0f, // Iteration shortcut threshold ms 844 20, // Max iterations 845 33.0f, // Target frame time 846 40.0f, // Frame time cap 847 1000.0f // Target measurement duration 848 )); 849 850 while (true) 851 { 852 switch(calibrator.getState()) 853 { 854 case TheilSenCalibrator::STATE_FINISHED: 855 logCalibrationInfo(m_testCtx.getLog(), calibrator); 856 return calibrator.getCallCount(); 857 858 case TheilSenCalibrator::STATE_MEASURE: 859 { 860 deUint8 buffer[4]; 861 deInt64 now; 862 deInt64 prev; 863 864 prev = deGetMicroseconds(); 865 866 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 867 gl.disable(GL_DEPTH_TEST); 868 869 render(occluderGeometry); 870 render(occludedGeometry, calibrator.getCallCount()); 871 872 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 873 874 now = deGetMicroseconds(); 875 876 calibrator.recordIteration(now - prev); 877 break; 878 } 879 880 case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: 881 calibrator.recomputeParameters(); 882 break; 883 default: 884 DE_ASSERT(false); 885 return 1; 886 } 887 } 888} 889 890// Compares time/workload gradients of same geometry with and without depth testing 891class RelativeChangeCase : public BaseCase 892{ 893public: 894 RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 895 virtual ~RelativeChangeCase (void) {} 896 897protected: 898 Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const; 899 900 virtual void logAnalysis (const vector<Sample>& samples); 901 902private: 903 int calibrate (void) const; 904}; 905 906RelativeChangeCase::RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 907 : BaseCase (testCtx, renderCtx, name, desc) 908{ 909} 910 911int RelativeChangeCase::calibrate (void) const 912{ 913 using namespace gls; 914 915 const glw::Functions& gl = m_renderCtx.getFunctions(); 916 TestLog& log = m_testCtx.getLog(); 917 918 const RenderData geom (genOccludedGeometry(), m_renderCtx, log); 919 920 TheilSenCalibrator calibrator(CalibratorParameters( 20, // Initial workload 921 10, // Max iteration frames 922 20.0f, // Iteration shortcut threshold ms 923 20, // Max iterations 924 33.0f, // Target frame time 925 40.0f, // Frame time cap 926 1000.0f // Target measurement duration 927 )); 928 929 while (true) 930 { 931 switch(calibrator.getState()) 932 { 933 case TheilSenCalibrator::STATE_FINISHED: 934 logCalibrationInfo(m_testCtx.getLog(), calibrator); 935 return calibrator.getCallCount(); 936 937 case TheilSenCalibrator::STATE_MEASURE: 938 { 939 deUint8 buffer[4]; 940 const GLuint program = geom.m_program.getProgram(); 941 942 gl.useProgram(program); 943 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount()); 944 945 const deInt64 prev = deGetMicroseconds(); 946 947 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 948 gl.disable(GL_DEPTH_TEST); 949 950 render(geom); 951 952 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 953 954 const deInt64 now = deGetMicroseconds(); 955 956 calibrator.recordIteration(now - prev); 957 break; 958 } 959 960 case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: 961 calibrator.recomputeParameters(); 962 break; 963 default: 964 DE_ASSERT(false); 965 return 1; 966 } 967 } 968} 969 970Sample RelativeChangeCase::renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const 971{ 972 const glw::Functions& gl = m_renderCtx.getFunctions(); 973 const GLuint program = occluded.m_program.getProgram(); 974 Sample sample; 975 deUint64 now = 0; 976 deUint64 prev = 0; 977 deUint8 buffer[4]; 978 979 gl.useProgram(program); 980 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); 981 982 // Warmup (this workload seems to reduce variation in following workloads) 983 { 984 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 985 gl.disable(GL_DEPTH_TEST); 986 987 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 988 } 989 990 // Null time 991 { 992 prev = deGetMicroseconds(); 993 994 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 995 gl.disable(GL_DEPTH_TEST); 996 997 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 998 999 now = deGetMicroseconds(); 1000 1001 sample.nullTime = now - prev; 1002 } 1003 1004 // Test time 1005 { 1006 prev = deGetMicroseconds(); 1007 1008 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1009 gl.enable(GL_DEPTH_TEST); 1010 1011 render(occluder); 1012 render(occluded); 1013 1014 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1015 1016 now = deGetMicroseconds(); 1017 1018 sample.testTime = now - prev; 1019 } 1020 1021 // Base time 1022 { 1023 prev = deGetMicroseconds(); 1024 1025 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1026 gl.disable(GL_DEPTH_TEST); 1027 1028 render(occluder); 1029 render(occluded); 1030 1031 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1032 1033 now = deGetMicroseconds(); 1034 1035 sample.baseTime = now - prev; 1036 } 1037 1038 sample.workload = 0; 1039 1040 return sample; 1041} 1042 1043void RelativeChangeCase::logAnalysis (const vector<Sample>& samples) 1044{ 1045 using namespace gls; 1046 1047 TestLog& log = m_testCtx.getLog(); 1048 1049 int maxWorkload = 0; 1050 1051 vector<Vec2> nullSamples (samples.size()); 1052 vector<Vec2> baseSamples (samples.size()); 1053 vector<Vec2> testSamples (samples.size()); 1054 1055 for (size_t ndx = 0; ndx < samples.size(); ndx++) 1056 { 1057 const Sample& sample = samples[ndx]; 1058 1059 nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime); 1060 baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime); 1061 testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); 1062 1063 maxWorkload = de::max(maxWorkload, sample.workload); 1064 } 1065 1066 { 1067 const float confidence = 0.60f; 1068 1069 const LineParametersWithConfidence nullParam = theilSenSiegelLinearRegression(nullSamples, confidence); 1070 const LineParametersWithConfidence baseParam = theilSenSiegelLinearRegression(baseSamples, confidence); 1071 const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); 1072 1073 if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper)) 1074 { 1075 m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant"); 1076 log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage; 1077 } 1078 1079 if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper)) 1080 { 1081 m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration"); 1082 log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage; 1083 } 1084 1085 log << TestLog::Section("Linear Regression", "Linear Regression"); 1086 log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage; 1087 1088 log << TestLog::Message << "Render time for empty scene was\n\t" 1089 << "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset << ", " << nullParam.offsetConfidenceUpper << "]us +" 1090 << "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]" 1091 << "us/workload" << TestLog::EndMessage; 1092 1093 log << TestLog::Message << "Render time for scene without depth test was\n\t" 1094 << "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset << ", " << baseParam.offsetConfidenceUpper << "]us +" 1095 << "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]" 1096 << "us/workload" << TestLog::EndMessage; 1097 1098 log << TestLog::Message << "Render time for scene with depth test was\n\t" 1099 << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" 1100 << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" 1101 << "us/workload" << TestLog::EndMessage; 1102 1103 log << TestLog::EndSection; 1104 1105 if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper)) 1106 { 1107 log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage; 1108 m_results.addResult(QP_TEST_RESULT_PASS, "0.0"); 1109 } 1110 else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25) 1111 { 1112 log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; 1113 m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); 1114 } 1115 else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper*0.25) 1116 { 1117 log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage; 1118 m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); 1119 } 1120 else 1121 { 1122 log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage; 1123 m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient)/de::abs(baseParam.coefficient), 2)); 1124 } 1125 } 1126} 1127 1128// Speed of trivial culling 1129class BaseCostCase : public RenderCountCase 1130{ 1131public: 1132 BaseCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1133 : RenderCountCase (testCtx, renderCtx, name, desc) {} 1134 1135 ~BaseCostCase (void) {} 1136 1137private: 1138 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1139 virtual ObjectData genOccludedGeometry (void) const { return Utils::variableQuad(0.8f); } 1140 1141 virtual void logDescription (void) 1142 { 1143 TestLog& log = m_testCtx.getLog(); 1144 1145 log << TestLog::Section("Description", "Test description"); 1146 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1147 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1148 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1149 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1150 log << TestLog::EndSection; 1151 } 1152}; 1153 1154// Gradient 1155class GradientCostCase : public RenderCountCase 1156{ 1157public: 1158 GradientCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float gradientDistance) 1159 : RenderCountCase (testCtx, renderCtx, name, desc) 1160 , m_gradientDistance (gradientDistance) 1161 { 1162 } 1163 1164 ~GradientCostCase (void) {} 1165 1166private: 1167 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); } 1168 virtual ObjectData genOccludedGeometry (void) const 1169 { 1170 return ObjectData(glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f)); 1171 } 1172 1173 virtual void logDescription (void) 1174 { 1175 TestLog& log = m_testCtx.getLog(); 1176 1177 log << TestLog::Section("Description", "Test description"); 1178 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1179 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1180 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1181 log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge of the occluding quad has a depth of 0.0." << TestLog::EndMessage; 1182 log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage; 1183 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1184 log << TestLog::EndSection; 1185 } 1186 1187 const float m_gradientDistance; 1188}; 1189 1190// Constant offset to frag depth in occluder 1191class OccluderStaticFragDepthCostCase : public RenderCountCase 1192{ 1193public: 1194 OccluderStaticFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1195 : RenderCountCase(testCtx, renderCtx, name, desc) 1196 { 1197 } 1198 1199 ~OccluderStaticFragDepthCostCase (void) {} 1200 1201private: 1202 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } 1203 virtual ObjectData genOccludedGeometry (void) const { return Utils::fastQuad(0.8f); } 1204 1205 virtual void logDescription (void) 1206 { 1207 TestLog& log = m_testCtx.getLog(); 1208 1209 log << TestLog::Section("Description", "Test description"); 1210 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1211 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1212 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1213 log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1214 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1215 log << TestLog::EndSection; 1216 } 1217}; 1218 1219// Dynamic offset to frag depth in occluder 1220class OccluderDynamicFragDepthCostCase : public RenderCountCase 1221{ 1222public: 1223 OccluderDynamicFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1224 : RenderCountCase(testCtx, renderCtx, name, desc) 1225 { 1226 } 1227 1228 ~OccluderDynamicFragDepthCostCase (void) {} 1229 1230private: 1231 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } 1232 virtual ObjectData genOccludedGeometry (void) const { return Utils::fastQuad(0.8f); } 1233 1234 virtual void logDescription (void) 1235 { 1236 TestLog& log = m_testCtx.getLog(); 1237 1238 log << TestLog::Section("Description", "Test description"); 1239 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1240 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1241 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1242 log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1243 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1244 log << TestLog::EndSection; 1245 } 1246}; 1247 1248// Constant offset to frag depth in occluder 1249class OccludedStaticFragDepthCostCase : public RenderCountCase 1250{ 1251public: 1252 OccludedStaticFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1253 : RenderCountCase(testCtx, renderCtx, name, desc) 1254 { 1255 } 1256 1257 ~OccludedStaticFragDepthCostCase (void) {} 1258 1259private: 1260 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1261 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } 1262 1263 virtual void logDescription (void) 1264 { 1265 TestLog& log = m_testCtx.getLog(); 1266 1267 log << TestLog::Section("Description", "Test description"); 1268 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1269 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1270 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1271 log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1272 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1273 log << TestLog::EndSection; 1274 } 1275}; 1276 1277// Dynamic offset to frag depth in occluder 1278class OccludedDynamicFragDepthCostCase : public RenderCountCase 1279{ 1280public: 1281 OccludedDynamicFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1282 : RenderCountCase(testCtx, renderCtx, name, desc) 1283 { 1284 } 1285 1286 ~OccludedDynamicFragDepthCostCase (void) {} 1287 1288private: 1289 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1290 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } 1291 1292 virtual void logDescription (void) 1293 { 1294 TestLog& log = m_testCtx.getLog(); 1295 1296 log << TestLog::Section("Description", "Test description"); 1297 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1298 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; 1299 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1300 log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1301 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1302 log << TestLog::EndSection; 1303 } 1304}; 1305 1306// Culling speed with slightly less trivial geometry 1307class OccludingGeometryComplexityCostCase : public RenderCountCase 1308{ 1309public: 1310 OccludingGeometryComplexityCostCase (TestContext& testCtx, 1311 const RenderContext& renderCtx, 1312 const char* name, 1313 const char* desc, 1314 int resolution, 1315 float xyNoise, 1316 float zNoise) 1317 : RenderCountCase (testCtx, renderCtx, name, desc) 1318 , m_resolution (resolution) 1319 , m_xyNoise (xyNoise) 1320 , m_zNoise (zNoise) 1321 { 1322 } 1323 1324 ~OccludingGeometryComplexityCostCase (void) {} 1325 1326private: 1327 virtual ObjectData genOccluderGeometry (void) const 1328 { 1329 return ObjectData(Utils::getBaseShader(), 1330 Utils::getFullScreenGrid(m_resolution, 1331 deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(), 1332 0.2f, 1333 m_zNoise, 1334 m_xyNoise)); 1335 } 1336 1337 virtual ObjectData genOccludedGeometry (void) const { return Utils::variableQuad(0.8f); } 1338 1339 virtual void logDescription (void) 1340 { 1341 TestLog& log = m_testCtx.getLog(); 1342 1343 log << TestLog::Section("Description", "Test description"); 1344 log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; 1345 log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage; 1346 log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; 1347 log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; 1348 log << TestLog::EndSection; 1349 } 1350 1351 const int m_resolution; 1352 const float m_xyNoise; 1353 const float m_zNoise; 1354}; 1355 1356 1357// Cases with varying workloads in the fragment shader 1358class FragmentWorkloadCullCase : public RelativeChangeCase 1359{ 1360public: 1361 FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 1362 virtual ~FragmentWorkloadCullCase (void) {} 1363 1364private: 1365 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1366 1367 virtual void logDescription (void); 1368}; 1369 1370FragmentWorkloadCullCase::FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1371 : RelativeChangeCase (testCtx, renderCtx, name, desc) 1372{ 1373} 1374 1375void FragmentWorkloadCullCase::logDescription (void) 1376{ 1377 TestLog& log = m_testCtx.getLog(); 1378 1379 log << TestLog::Section("Description", "Test description"); 1380 log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; 1381 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," 1382 "the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1383 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1384 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1385 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1386 log << TestLog::EndSection; 1387} 1388 1389// Additional workload consists of texture lookups 1390class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase 1391{ 1392public: 1393 FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); 1394 virtual ~FragmentTextureWorkloadCullCase (void) {} 1395 1396 virtual void init (void); 1397 virtual void deinit (void); 1398 1399private: 1400 typedef MovePtr<glu::Texture> TexPtr; 1401 1402 virtual ObjectData genOccludedGeometry (void) const 1403 { 1404 return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f)); 1405 } 1406 1407 TexPtr m_texture; 1408}; 1409 1410FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1411 : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) 1412{ 1413} 1414 1415void FragmentTextureWorkloadCullCase::init (void) 1416{ 1417 const glw::Functions& gl = m_renderCtx.getFunctions(); 1418 const int size = 128; 1419 const vector<deUint8> data (size*size*4, 255); 1420 1421 m_texture = MovePtr<glu::Texture>(new glu::Texture(gl)); 1422 1423 gl.bindTexture(GL_TEXTURE_2D, m_texture); 1424 gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]); 1425 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1426 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1427} 1428 1429void FragmentTextureWorkloadCullCase::deinit (void) 1430{ 1431 m_texture.clear(); 1432} 1433 1434// Additional workload consists of arithmetic 1435class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase 1436{ 1437public: 1438 FragmentArithmeticWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1439 : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) 1440 { 1441 } 1442 virtual ~FragmentArithmeticWorkloadCullCase (void) {} 1443 1444private: 1445 virtual ObjectData genOccludedGeometry (void) const 1446 { 1447 return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f)); 1448 } 1449}; 1450 1451// Contains dynamicly unused discard after a series of calculations 1452class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase 1453{ 1454public: 1455 FragmentDiscardArithmeticWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1456 : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) 1457 { 1458 } 1459 1460 virtual ~FragmentDiscardArithmeticWorkloadCullCase (void) {} 1461 1462private: 1463 virtual ObjectData genOccludedGeometry (void) const 1464 { 1465 return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f)); 1466 } 1467 1468 virtual void logDescription (void) 1469 { 1470 TestLog& log = m_testCtx.getLog(); 1471 1472 log << TestLog::Section("Description", "Test description"); 1473 log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; 1474 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," 1475 "the second (occluded) contains significant fragment shader work and a discard that is never triggers but has a dynamic condition" << TestLog::EndMessage; 1476 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1477 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1478 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1479 log << TestLog::EndSection; 1480 } 1481}; 1482 1483// Discards fragments from the occluder in a grid pattern 1484class PartialOccluderDiscardCullCase : public RelativeChangeCase 1485{ 1486public: 1487 PartialOccluderDiscardCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int gridsize) 1488 : RelativeChangeCase (testCtx, renderCtx, name, desc) 1489 , m_gridsize (gridsize) 1490 { 1491 } 1492 virtual ~PartialOccluderDiscardCullCase (void) {} 1493 1494private: 1495 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); } 1496 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1497 1498 virtual void logDescription (void) 1499 { 1500 TestLog& log = m_testCtx.getLog(); 1501 1502 log << TestLog::Section("Description", "Test description"); 1503 log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage; 1504 log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the " 1505 "fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage; 1506 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1507 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1508 log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time" << TestLog::EndMessage; 1509 log << TestLog::EndSection; 1510 } 1511 1512 const int m_gridsize; 1513}; 1514 1515// Trivial occluder covering part of screen 1516class PartialOccluderCullCase : public RelativeChangeCase 1517{ 1518public: 1519 PartialOccluderCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float coverage) 1520 : RelativeChangeCase (testCtx, renderCtx, name, desc) 1521 , m_coverage (coverage) 1522 { 1523 } 1524 ~PartialOccluderCullCase (void) {} 1525 1526private: 1527 virtual ObjectData genOccluderGeometry (void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); } 1528 virtual ObjectData genOccludedGeometry (void) const {return Utils::slowQuad(0.8f); } 1529 1530 virtual void logDescription (void) 1531 { 1532 TestLog& log = m_testCtx.getLog(); 1533 1534 log << TestLog::Section("Description", "Test description"); 1535 log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage; 1536 log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage*100.0f 1537 << "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment shader work" << TestLog::EndMessage; 1538 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1539 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1540 log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area" << TestLog::EndMessage; 1541 log << TestLog::EndSection; 1542 } 1543 1544 const float m_coverage; 1545}; 1546 1547// Constant offset to frag depth in occluder 1548class StaticOccluderFragDepthCullCase : public RelativeChangeCase 1549{ 1550public: 1551 StaticOccluderFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1552 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1553 { 1554 } 1555 1556 ~StaticOccluderFragDepthCullCase (void) {} 1557 1558private: 1559 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } 1560 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1561 1562 virtual void logDescription (void) 1563 { 1564 TestLog& log = m_testCtx.getLog(); 1565 1566 log << TestLog::Section("Description", "Test description"); 1567 log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; 1568 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1569 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1570 log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1571 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1572 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1573 log << TestLog::EndSection; 1574 } 1575}; 1576 1577// Dynamic offset to frag depth in occluder 1578class DynamicOccluderFragDepthCullCase : public RelativeChangeCase 1579{ 1580public: 1581 DynamicOccluderFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1582 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1583 { 1584 } 1585 1586 ~DynamicOccluderFragDepthCullCase (void) {} 1587 1588private: 1589 virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } 1590 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1591 1592 virtual void logDescription (void) 1593 { 1594 TestLog& log = m_testCtx.getLog(); 1595 1596 log << TestLog::Section("Description", "Test description"); 1597 log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; 1598 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1599 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1600 log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1601 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1602 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1603 log << TestLog::EndSection; 1604 } 1605}; 1606 1607// Constant offset to frag depth in occluded 1608class StaticOccludedFragDepthCullCase : public RelativeChangeCase 1609{ 1610public: 1611 StaticOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1612 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1613 { 1614 } 1615 1616 ~StaticOccludedFragDepthCullCase (void) {} 1617 1618private: 1619 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1620 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } 1621 1622 virtual void logDescription (void) 1623 { 1624 TestLog& log = m_testCtx.getLog(); 1625 1626 log << TestLog::Section("Description", "Test description"); 1627 log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; 1628 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1629 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1630 log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; 1631 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1632 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1633 log << TestLog::EndSection; 1634 } 1635}; 1636 1637// Dynamic offset to frag depth in occluded 1638class DynamicOccludedFragDepthCullCase : public RelativeChangeCase 1639{ 1640public: 1641 DynamicOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1642 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1643 { 1644 } 1645 1646 ~DynamicOccludedFragDepthCullCase (void) {} 1647 1648private: 1649 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1650 virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } 1651 1652 virtual void logDescription (void) 1653 { 1654 TestLog& log = m_testCtx.getLog(); 1655 1656 log << TestLog::Section("Description", "Test description"); 1657 log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; 1658 log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1659 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1660 log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; 1661 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1662 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1663 log << TestLog::EndSection; 1664 } 1665}; 1666 1667// Dynamic offset to frag depth in occluded 1668class ReversedDepthOrderCullCase : public RelativeChangeCase 1669{ 1670public: 1671 ReversedDepthOrderCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) 1672 : RelativeChangeCase(testCtx, renderCtx, name, desc) 1673 { 1674 } 1675 1676 ~ReversedDepthOrderCullCase (void) {} 1677 1678private: 1679 virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } 1680 virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } 1681 1682 virtual void logDescription (void) 1683 { 1684 TestLog& log = m_testCtx.getLog(); 1685 1686 log << TestLog::Section("Description", "Test description"); 1687 log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage; 1688 log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first (occluded) contains significant fragment shader work" << TestLog::EndMessage; 1689 log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; 1690 log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; 1691 log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; 1692 log << TestLog::EndSection; 1693 } 1694 1695 // Rendering order of occluder & occluded is reversed, otherwise identical to parent version 1696 Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const 1697 { 1698 const glw::Functions& gl = m_renderCtx.getFunctions(); 1699 const GLuint program = occluded.m_program.getProgram(); 1700 Sample sample; 1701 deUint64 now = 0; 1702 deUint64 prev = 0; 1703 deUint8 buffer[4]; 1704 1705 gl.useProgram(program); 1706 gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); 1707 1708 // Warmup (this workload seems to reduce variation in following workloads) 1709 { 1710 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1711 gl.disable(GL_DEPTH_TEST); 1712 1713 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1714 } 1715 1716 // Null time 1717 { 1718 prev = deGetMicroseconds(); 1719 1720 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1721 gl.disable(GL_DEPTH_TEST); 1722 1723 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1724 1725 now = deGetMicroseconds(); 1726 1727 sample.nullTime = now - prev; 1728 } 1729 1730 // Test time 1731 { 1732 prev = deGetMicroseconds(); 1733 1734 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1735 gl.enable(GL_DEPTH_TEST); 1736 1737 render(occluded); 1738 render(occluder); 1739 1740 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1741 1742 now = deGetMicroseconds(); 1743 1744 sample.testTime = now - prev; 1745 } 1746 1747 // Base time 1748 { 1749 prev = deGetMicroseconds(); 1750 1751 gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); 1752 gl.disable(GL_DEPTH_TEST); 1753 1754 render(occluded); 1755 render(occluder); 1756 1757 gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); 1758 1759 now = deGetMicroseconds(); 1760 1761 sample.baseTime = now - prev; 1762 } 1763 1764 sample.workload = 0; 1765 1766 return sample; 1767 } 1768}; 1769 1770} // Anonymous 1771 1772DepthTests::DepthTests (Context& context) 1773 : TestCaseGroup (context, "depth", "Depth culling performance") 1774{ 1775} 1776 1777void DepthTests::init (void) 1778{ 1779 TestContext& testCtx = m_context.getTestContext(); 1780 const RenderContext& renderCtx = m_context.getRenderContext(); 1781 1782 { 1783 tcu::TestCaseGroup* const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency"); 1784 1785 addChild(cullEfficiencyGroup); 1786 1787 { 1788 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload"); 1789 1790 cullEfficiencyGroup->addChild(group); 1791 1792 group->addChild(new FragmentTextureWorkloadCullCase( testCtx, renderCtx, "workload_texture", "Fragment shader with texture lookup workload")); 1793 group->addChild(new FragmentArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic", "Fragment shader with arithmetic workload")); 1794 group->addChild(new FragmentDiscardArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic_discard", "Fragment shader that may discard with arithmetic workload")); 1795 } 1796 1797 { 1798 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard"); 1799 1800 cullEfficiencyGroup->addChild(group); 1801 1802 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256", "Parts of occluder geometry discarded", 256)); 1803 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128", "Parts of occluder geometry discarded", 128)); 1804 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64", "Parts of occluder geometry discarded", 64)); 1805 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32", "Parts of occluder geometry discarded", 32)); 1806 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16", "Parts of occluder geometry discarded", 16)); 1807 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8", "Parts of occluder geometry discarded", 8)); 1808 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4", "Parts of occluder geometry discarded", 4)); 1809 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2", "Parts of occluder geometry discarded", 2)); 1810 group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1", "Parts of occluder geometry discarded", 1)); 1811 } 1812 1813 { 1814 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage"); 1815 1816 cullEfficiencyGroup->addChild(group); 1817 1818 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f)); 1819 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f)); 1820 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f)); 1821 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f)); 1822 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f)); 1823 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f)); 1824 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f)); 1825 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f)); 1826 group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f)); 1827 } 1828 1829 { 1830 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage"); 1831 1832 cullEfficiencyGroup->addChild(group); 1833 1834 group->addChild(new StaticOccluderFragDepthCullCase( testCtx, renderCtx, "occluder_static", "")); 1835 group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", "")); 1836 group->addChild(new StaticOccludedFragDepthCullCase( testCtx, renderCtx, "occluded_static", "")); 1837 group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", "")); 1838 } 1839 1840 { 1841 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order"); 1842 1843 cullEfficiencyGroup->addChild(group); 1844 1845 group->addChild(new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order")); 1846 } 1847 } 1848 1849 { 1850 tcu::TestCaseGroup* const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency"); 1851 1852 addChild(testCostGroup); 1853 1854 { 1855 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences"); 1856 1857 testCostGroup->addChild(group); 1858 1859 group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", "")); 1860 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f)); 1861 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f)); 1862 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f)); 1863 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f)); 1864 group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f)); 1865 } 1866 1867 { 1868 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity"); 1869 1870 testCostGroup->addChild(group); 1871 1872 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5", "", 5, 0.0f, 0.0f)); 1873 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15", "", 15, 0.0f, 0.0f)); 1874 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25", "", 25, 0.0f, 0.0f)); 1875 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50", "", 50, 0.0f, 0.0f)); 1876 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f)); 1877 1878 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5", "", 5, 1.0f/5.0f, 0.0f)); 1879 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15", "", 15, 1.0f/15.0f, 0.0f)); 1880 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25", "", 25, 1.0f/25.0f, 0.0f)); 1881 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50", "", 50, 1.0f/50.0f, 0.0f)); 1882 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f/100.0f, 0.0f)); 1883 1884 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5", "", 5, 0.0f, 0.2f)); 1885 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15", "", 15, 0.0f, 0.2f)); 1886 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25", "", 25, 0.0f, 0.2f)); 1887 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50", "", 50, 0.0f, 0.2f)); 1888 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f)); 1889 1890 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5", "", 5, 1.0f/5.0f, 0.2f)); 1891 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15", "", 15, 1.0f/15.0f, 0.2f)); 1892 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25", "", 25, 1.0f/25.0f, 0.2f)); 1893 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50", "", 50, 1.0f/50.0f, 0.2f)); 1894 group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f/100.0f, 0.2f)); 1895 } 1896 1897 { 1898 tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth"); 1899 1900 testCostGroup->addChild(group); 1901 1902 group->addChild(new OccluderStaticFragDepthCostCase( testCtx, renderCtx, "occluder_static", "")); 1903 group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", "")); 1904 group->addChild(new OccludedStaticFragDepthCostCase( testCtx, renderCtx, "occluded_static", "")); 1905 group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", "")); 1906 } 1907 } 1908} 1909 1910} // Performance 1911} // gles3 1912} // deqp 1913