1/*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.1 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Indirect compute dispatch tests. 22 *//*--------------------------------------------------------------------*/ 23 24#include "es31fIndirectComputeDispatchTests.hpp" 25#include "gluObjectWrapper.hpp" 26#include "gluRenderContext.hpp" 27#include "gluShaderProgram.hpp" 28#include "glwFunctions.hpp" 29#include "glwEnums.hpp" 30#include "tcuVector.hpp" 31#include "tcuStringTemplate.hpp" 32#include "tcuTestLog.hpp" 33#include "deStringUtil.hpp" 34 35#include <vector> 36#include <string> 37#include <map> 38 39namespace deqp 40{ 41namespace gles31 42{ 43namespace Functional 44{ 45 46using tcu::UVec3; 47using tcu::TestLog; 48using std::vector; 49using std::string; 50using std::map; 51 52// \todo [2014-02-17 pyry] Should be extended with following: 53 54// Negative: 55// - no active shader program 56// - indirect negative or not aligned 57// - indirect + size outside buffer bounds 58// - no buffer bound to DRAW_INDIRECT_BUFFER 59// - (implict) buffer mapped 60 61// Robustness: 62// - lot of small work group launches 63// - very large work group size 64// - no synchronization, touched by gpu 65// - compute program overwiting buffer 66 67namespace 68{ 69 70enum 71{ 72 RESULT_BLOCK_BASE_SIZE = (3+1)*(int)sizeof(deUint32), // uvec3 + uint 73 RESULT_BLOCK_EXPECTED_COUNT_OFFSET = 0, 74 RESULT_BLOCK_NUM_PASSED_OFFSET = 3*(int)sizeof(deUint32), 75 76 INDIRECT_COMMAND_SIZE = 3*(int)sizeof(deUint32) 77}; 78 79enum GenBuffer 80{ 81 GEN_BUFFER_UPLOAD = 0, 82 GEN_BUFFER_COMPUTE, 83 84 GEN_BUFFER_LAST 85}; 86 87glu::ProgramSources genVerifySources (const UVec3& workGroupSize) 88{ 89 static const char* s_verifyDispatchTmpl = 90 "#version 310 es\n" 91 "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n" 92 "layout(binding = 0, std430) buffer Result\n" 93 "{\n" 94 " uvec3 expectedGroupCount;\n" 95 " coherent uint numPassed;\n" 96 "} result;\n" 97 "void main (void)\n" 98 "{\n" 99 " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n" 100 " atomicAdd(result.numPassed, 1u);\n" 101 "}\n"; 102 103 map<string, string> args; 104 105 args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x()); 106 args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y()); 107 args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z()); 108 109 return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args)); 110} 111 112class IndirectDispatchCase : public TestCase 113{ 114public: 115 IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer); 116 ~IndirectDispatchCase (void); 117 118 IterateResult iterate (void); 119 120protected: 121 struct DispatchCommand 122 { 123 deIntptr offset; 124 UVec3 numWorkGroups; 125 126 DispatchCommand (void) : offset(0) {} 127 DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {} 128 }; 129 130 GenBuffer m_genBuffer; 131 deUintptr m_bufferSize; 132 UVec3 m_workGroupSize; 133 vector<DispatchCommand> m_commands; 134 135 void createCommandBuffer (deUint32 buffer) const; 136 void createResultBuffer (deUint32 buffer) const; 137 138 bool verifyResultBuffer (deUint32 buffer); 139 140 void createCmdBufferUpload (deUint32 buffer) const; 141 void createCmdBufferCompute (deUint32 buffer) const; 142 143private: 144 IndirectDispatchCase (const IndirectDispatchCase&); 145 IndirectDispatchCase& operator= (const IndirectDispatchCase&); 146}; 147 148IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer) 149 : TestCase (context, name, description) 150 , m_genBuffer (genBuffer) 151 , m_bufferSize (0) 152{ 153} 154 155IndirectDispatchCase::~IndirectDispatchCase (void) 156{ 157} 158 159static int getResultBlockAlignedSize (const glw::Functions& gl) 160{ 161 const int baseSize = RESULT_BLOCK_BASE_SIZE; 162 int alignment = 0; 163 gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment); 164 165 if (alignment == 0 || (baseSize % alignment == 0)) 166 return baseSize; 167 else 168 return (baseSize/alignment + 1)*alignment; 169} 170 171void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const 172{ 173 switch (m_genBuffer) 174 { 175 case GEN_BUFFER_UPLOAD: createCmdBufferUpload (buffer); break; 176 case GEN_BUFFER_COMPUTE: createCmdBufferCompute (buffer); break; 177 default: 178 DE_ASSERT(false); 179 } 180} 181 182void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const 183{ 184 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 185 vector<deUint8> data (m_bufferSize); 186 187 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) 188 { 189 DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3); 190 DE_ASSERT(cmdIter->offset >= 0); 191 DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0); 192 DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize); 193 194 deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset]; 195 196 dstPtr[0] = cmdIter->numWorkGroups[0]; 197 dstPtr[1] = cmdIter->numWorkGroups[1]; 198 dstPtr[2] = cmdIter->numWorkGroups[2]; 199 } 200 201 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer); 202 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW); 203} 204 205void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const 206{ 207 std::ostringstream src; 208 209 // Header 210 src << 211 "#version 310 es\n" 212 "layout(local_size_x = 1) in;\n" 213 "layout(std430, binding = 1) buffer Out\n" 214 "{\n" 215 " highp uint data[];\n" 216 "};\n" 217 "void writeCmd (uint offset, uvec3 numWorkGroups)\n" 218 "{\n" 219 " data[offset+0u] = numWorkGroups.x;\n" 220 " data[offset+1u] = numWorkGroups.y;\n" 221 " data[offset+2u] = numWorkGroups.z;\n" 222 "}\n" 223 "void main (void)\n" 224 "{\n"; 225 226 // Commands 227 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) 228 { 229 const deUint32 offs = (deUint32)(cmdIter->offset/4); 230 DE_ASSERT((deIntptr)offs*4 == cmdIter->offset); 231 232 src << "\twriteCmd(" << offs << "u, uvec3(" 233 << cmdIter->numWorkGroups.x() << "u, " 234 << cmdIter->numWorkGroups.y() << "u, " 235 << cmdIter->numWorkGroups.z() << "u));\n"; 236 } 237 238 src << "}\n"; 239 240 { 241 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 242 glu::ShaderProgram program (m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str())); 243 244 m_testCtx.getLog() << program; 245 if (!program.isOk()) 246 TCU_FAIL("Compile failed"); 247 248 gl.useProgram(program.getProgram()); 249 250 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer); 251 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW); 252 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer); 253 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 254 255 gl.dispatchCompute(1,1,1); 256 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed"); 257 258 gl.memoryBarrier(GL_COMMAND_BARRIER_BIT); 259 GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed"); 260 } 261} 262 263void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const 264{ 265 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 266 const int resultBlockSize = getResultBlockAlignedSize(gl); 267 const int resultBufferSize = resultBlockSize*(int)m_commands.size(); 268 vector<deUint8> data (resultBufferSize); 269 270 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) 271 { 272 deUint8* const dstPtr = &data[resultBlockSize*cmdNdx]; 273 274 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4) = m_commands[cmdNdx].numWorkGroups[0]; 275 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4) = m_commands[cmdNdx].numWorkGroups[1]; 276 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4) = m_commands[cmdNdx].numWorkGroups[2]; 277 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0; 278 } 279 280 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); 281 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ); 282} 283 284deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups) 285{ 286 const int numInvocationsPerGroup = workGroupSize[0]*workGroupSize[1]*workGroupSize[2]; 287 const int numGroups = numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2]; 288 289 return numInvocationsPerGroup*numGroups; 290} 291 292bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer) 293{ 294 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 295 296 const int resultBlockSize = getResultBlockAlignedSize(gl); 297 const int resultBufferSize = resultBlockSize*(int)m_commands.size(); 298 299 void* mapPtr = DE_NULL; 300 bool allOk = true; 301 302 try 303 { 304 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer); 305 mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT); 306 307 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed"); 308 TCU_CHECK(mapPtr); 309 310 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) 311 { 312 const DispatchCommand& cmd = m_commands[cmdNdx]; 313 const deUint8* const srcPtr = (const deUint8*)mapPtr + cmdNdx*resultBlockSize; 314 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET); 315 const deUint32 expectedCount = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups); 316 317 // Verify numPassed. 318 if (numPassed != expectedCount) 319 { 320 m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx 321 << ": got numPassed = " << numPassed << ", expected " << expectedCount 322 << TestLog::EndMessage; 323 allOk = false; 324 } 325 } 326 } 327 catch (...) 328 { 329 if (mapPtr) 330 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 331 } 332 333 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER); 334 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed"); 335 336 return allOk; 337} 338 339IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void) 340{ 341 const glu::RenderContext& renderCtx = m_context.getRenderContext(); 342 const glw::Functions& gl = renderCtx.getFunctions(); 343 344 const glu::ShaderProgram program (renderCtx, genVerifySources(m_workGroupSize)); 345 346 glu::Buffer cmdBuffer (renderCtx); 347 glu::Buffer resultBuffer (renderCtx); 348 349 m_testCtx.getLog() << program; 350 TCU_CHECK_MSG(program.isOk(), "Compile failed"); 351 352 m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage; 353 { 354 tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)"); 355 356 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++) 357 m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset 358 << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups 359 << TestLog::EndMessage; 360 } 361 362 createResultBuffer(*resultBuffer); 363 createCommandBuffer(*cmdBuffer); 364 365 gl.useProgram(program.getProgram()); 366 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer); 367 GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed"); 368 369 { 370 const int resultBlockAlignedSize = getResultBlockAlignedSize(gl); 371 deIntptr curOffset = 0; 372 373 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter) 374 { 375 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize); 376 gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset); 377 378 curOffset += resultBlockAlignedSize; 379 } 380 } 381 382 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed"); 383 384 if (verifyResultBuffer(*resultBuffer)) 385 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 386 else 387 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer"); 388 389 return STOP; 390} 391 392class SingleDispatchCase : public IndirectDispatchCase 393{ 394public: 395 SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups) 396 : IndirectDispatchCase(context, name, description, genBuffer) 397 { 398 m_bufferSize = bufferSize; 399 m_workGroupSize = workGroupSize; 400 m_commands.push_back(DispatchCommand(offset, numWorkGroups)); 401 } 402}; 403 404class MultiDispatchCase : public IndirectDispatchCase 405{ 406public: 407 MultiDispatchCase (Context& context, GenBuffer genBuffer) 408 : IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer) 409 { 410 m_bufferSize = 1<<10; 411 m_workGroupSize = UVec3(3,1,2); 412 413 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 414 m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE, UVec3(2,1,1))); 415 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); 416 m_commands.push_back(DispatchCommand(40, UVec3(1,1,7))); 417 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); 418 } 419}; 420 421class MultiDispatchReuseCommandCase : public IndirectDispatchCase 422{ 423public: 424 MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer) 425 : IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer) 426 { 427 m_bufferSize = 1<<10; 428 m_workGroupSize = UVec3(3,1,2); 429 430 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 431 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 432 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1))); 433 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); 434 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1))); 435 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); 436 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4))); 437 } 438}; 439 440} // anonymous 441 442IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context) 443 : TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests") 444{ 445} 446 447IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void) 448{ 449} 450 451void IndirectComputeDispatchTests::init (void) 452{ 453 static const struct 454 { 455 const char* name; 456 GenBuffer gen; 457 } s_genBuffer[] = 458 { 459 { "upload_buffer", GEN_BUFFER_UPLOAD }, 460 { "gen_in_compute", GEN_BUFFER_COMPUTE } 461 }; 462 463 static const struct 464 { 465 const char* name; 466 const char* description; 467 deUintptr bufferSize; 468 deUintptr offset; 469 UVec3 workGroupSize; 470 UVec3 numWorkGroups; 471 } s_singleDispatchCases[] = 472 { 473 // Name Desc BufferSize Offs WorkGroupSize NumWorkGroups 474 { "single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(1,1,1) }, 475 { "multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(2,3,5) }, 476 { "multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(2,3,1), UVec3(1,2,3) }, 477 { "small_offset", "Small offset", 16+INDIRECT_COMMAND_SIZE, 16, UVec3(1,1,1), UVec3(1,1,1) }, 478 { "large_offset", "Large offset", (2<<20), (1<<20) + 12, UVec3(1,1,1), UVec3(1,1,1) }, 479 { "large_offset_multiple_invocations", "Large offset, multiple invocations", (2<<20), (1<<20) + 12, UVec3(2,3,1), UVec3(1,2,3) }, 480 { "empty_command", "Empty command", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(0,0,0) }, 481 }; 482 483 for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++) 484 { 485 const GenBuffer genBuf = s_genBuffer[genNdx].gen; 486 tcu::TestCaseGroup* const genGroup = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, ""); 487 addChild(genGroup); 488 489 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++) 490 genGroup->addChild(new SingleDispatchCase(m_context, 491 s_singleDispatchCases[ndx].name, 492 s_singleDispatchCases[ndx].description, 493 genBuf, 494 s_singleDispatchCases[ndx].bufferSize, 495 s_singleDispatchCases[ndx].offset, 496 s_singleDispatchCases[ndx].workGroupSize, 497 s_singleDispatchCases[ndx].numWorkGroups)); 498 499 genGroup->addChild(new MultiDispatchCase (m_context, genBuf)); 500 genGroup->addChild(new MultiDispatchReuseCommandCase (m_context, genBuf)); 501 } 502} 503 504} // Functional 505} // gles31 506} // deqp 507