es31fBasicComputeShaderTests.cpp revision 8852c82a1ffa4760985c17cc6875d5d521daf343
1/*------------------------------------------------------------------------- 2 * drawElements Quality Program OpenGL ES 3.1 Module 3 * ------------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Basic Compute Shader Tests. 22 *//*--------------------------------------------------------------------*/ 23 24#include "es31fBasicComputeShaderTests.hpp" 25#include "gluShaderProgram.hpp" 26#include "gluObjectWrapper.hpp" 27#include "gluRenderContext.hpp" 28#include "gluProgramInterfaceQuery.hpp" 29#include "gluContextInfo.hpp" 30#include "glwFunctions.hpp" 31#include "glwEnums.hpp" 32#include "tcuTestLog.hpp" 33#include "deRandom.hpp" 34#include "deStringUtil.hpp" 35#include "deMemory.h" 36 37namespace deqp 38{ 39namespace gles31 40{ 41namespace Functional 42{ 43 44using std::string; 45using std::vector; 46using tcu::TestLog; 47using namespace glu; 48 49//! Utility for mapping buffers. 50class BufferMemMap 51{ 52public: 53 BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access) 54 : m_gl (gl) 55 , m_target (target) 56 , m_ptr (DE_NULL) 57 { 58 m_ptr = gl.mapBufferRange(target, offset, size, access); 59 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()"); 60 TCU_CHECK(m_ptr); 61 } 62 63 ~BufferMemMap (void) 64 { 65 m_gl.unmapBuffer(m_target); 66 } 67 68 void* getPtr (void) const { return m_ptr; } 69 void* operator* (void) const { return m_ptr; } 70 71private: 72 BufferMemMap (const BufferMemMap& other); 73 BufferMemMap& operator= (const BufferMemMap& other); 74 75 const glw::Functions& m_gl; 76 const deUint32 m_target; 77 void* m_ptr; 78}; 79 80namespace 81{ 82 83class EmptyComputeShaderCase : public TestCase 84{ 85public: 86 EmptyComputeShaderCase (Context& context) 87 : TestCase(context, "empty", "Empty shader") 88 { 89 } 90 91 IterateResult iterate (void) 92 { 93 const ShaderProgram program(m_context.getRenderContext(), 94 ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, 95 "#version 310 es\n" 96 "layout (local_size_x = 1) in;\n" 97 "void main (void) {}\n" 98 )); 99 100 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 101 102 m_testCtx.getLog() << program; 103 if (!program.isOk()) 104 TCU_FAIL("Compile failed"); 105 106 gl.useProgram(program.getProgram()); 107 gl.dispatchCompute(1, 1, 1); 108 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 109 110 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 111 return STOP; 112 } 113}; 114 115class UBOToSSBOInvertCase : public TestCase 116{ 117public: 118 UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 119 : TestCase (context, name, description) 120 , m_numValues (numValues) 121 , m_localSize (localSize) 122 , m_workSize (workSize) 123 { 124 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 125 } 126 127 IterateResult iterate (void) 128 { 129 std::ostringstream src; 130 src << "#version 310 es\n" 131 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 132 << "uniform Input {\n" 133 << " uint values[" << m_numValues << "];\n" 134 << "} ub_in;\n" 135 << "layout(binding = 1) buffer Output {\n" 136 << " uint values[" << m_numValues << "];\n" 137 << "} sb_out;\n" 138 << "void main (void) {\n" 139 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 140 << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n" 141 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 142 << " uint offset = numValuesPerInv*groupNdx;\n" 143 << "\n" 144 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 145 << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n" 146 << "}\n"; 147 148 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 149 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 150 const Buffer inputBuffer (m_context.getRenderContext()); 151 const Buffer outputBuffer (m_context.getRenderContext()); 152 std::vector<deUint32> inputValues (m_numValues); 153 154 // Compute input values. 155 { 156 de::Random rnd(0x111223f); 157 for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) 158 inputValues[ndx] = rnd.getUint32(); 159 } 160 161 m_testCtx.getLog() << program; 162 if (!program.isOk()) 163 TCU_FAIL("Compile failed"); 164 165 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 166 167 gl.useProgram(program.getProgram()); 168 169 // Input buffer setup 170 { 171 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input"); 172 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex); 173 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values"); 174 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex); 175 176 gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer); 177 gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 178 179 { 180 const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 181 182 for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++) 183 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 184 } 185 186 gl.uniformBlockBinding(program.getProgram(), blockIndex, 0); 187 gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer); 188 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 189 } 190 191 // Output buffer setup 192 { 193 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 194 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 195 196 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 197 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 198 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer); 199 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 200 } 201 202 // Dispatch compute workload 203 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 204 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 205 206 // Read back and compare 207 { 208 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 209 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 210 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 211 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 212 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 213 214 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 215 for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) 216 { 217 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 218 const deUint32 ref = ~inputValues[ndx]; 219 220 if (res != ref) 221 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); 222 } 223 } 224 225 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 226 return STOP; 227 } 228 229private: 230 const int m_numValues; 231 const tcu::IVec3 m_localSize; 232 const tcu::IVec3 m_workSize; 233}; 234 235class CopyInvertSSBOCase : public TestCase 236{ 237public: 238 CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 239 : TestCase (context, name, description) 240 , m_numValues (numValues) 241 , m_localSize (localSize) 242 , m_workSize (workSize) 243 { 244 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 245 } 246 247 IterateResult iterate (void) 248 { 249 std::ostringstream src; 250 src << "#version 310 es\n" 251 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 252 << "layout(binding = 0) buffer Input {\n" 253 << " uint values[" << m_numValues << "];\n" 254 << "} sb_in;\n" 255 << "layout (binding = 1) buffer Output {\n" 256 << " uint values[" << m_numValues << "];\n" 257 << "} sb_out;\n" 258 << "void main (void) {\n" 259 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 260 << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n" 261 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 262 << " uint offset = numValuesPerInv*groupNdx;\n" 263 << "\n" 264 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 265 << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n" 266 << "}\n"; 267 268 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 269 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 270 const Buffer inputBuffer (m_context.getRenderContext()); 271 const Buffer outputBuffer (m_context.getRenderContext()); 272 std::vector<deUint32> inputValues (m_numValues); 273 274 // Compute input values. 275 { 276 de::Random rnd(0x124fef); 277 for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) 278 inputValues[ndx] = rnd.getUint32(); 279 } 280 281 m_testCtx.getLog() << program; 282 if (!program.isOk()) 283 TCU_FAIL("Compile failed"); 284 285 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 286 287 gl.useProgram(program.getProgram()); 288 289 // Input buffer setup 290 { 291 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); 292 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 293 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); 294 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 295 296 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); 297 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 298 299 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 300 301 { 302 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 303 304 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 305 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 306 } 307 308 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); 309 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 310 } 311 312 // Output buffer setup 313 { 314 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 315 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 316 317 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 318 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ); 319 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer); 320 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 321 } 322 323 // Dispatch compute workload 324 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 325 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 326 327 // Read back and compare 328 { 329 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 330 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 331 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 332 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 333 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 334 335 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 336 for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) 337 { 338 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 339 const deUint32 ref = ~inputValues[ndx]; 340 341 if (res != ref) 342 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); 343 } 344 } 345 346 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 347 return STOP; 348 } 349 350private: 351 const int m_numValues; 352 const tcu::IVec3 m_localSize; 353 const tcu::IVec3 m_workSize; 354}; 355 356class InvertSSBOInPlaceCase : public TestCase 357{ 358public: 359 InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 360 : TestCase (context, name, description) 361 , m_numValues (numValues) 362 , m_isSized (isSized) 363 , m_localSize (localSize) 364 , m_workSize (workSize) 365 { 366 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 367 } 368 369 IterateResult iterate (void) 370 { 371 std::ostringstream src; 372 src << "#version 310 es\n" 373 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 374 << "layout(binding = 0) buffer InOut {\n" 375 << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" 376 << "} sb_inout;\n" 377 << "void main (void) {\n" 378 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 379 << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n" 380 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 381 << " uint offset = numValuesPerInv*groupNdx;\n" 382 << "\n" 383 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 384 << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n" 385 << "}\n"; 386 387 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 388 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 389 390 m_testCtx.getLog() << program; 391 if (!program.isOk()) 392 TCU_FAIL("Compile failed"); 393 394 const Buffer outputBuffer (m_context.getRenderContext()); 395 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values"); 396 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 397 const deUint32 blockSize = valueInfo.arrayStride*(deUint32)m_numValues; 398 std::vector<deUint32> inputValues (m_numValues); 399 400 // Compute input values. 401 { 402 de::Random rnd(0x82ce7f); 403 for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) 404 inputValues[ndx] = rnd.getUint32(); 405 } 406 407 TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); 408 409 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 410 411 gl.useProgram(program.getProgram()); 412 413 // Output buffer setup 414 { 415 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 416 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW); 417 418 { 419 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT); 420 421 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 422 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 423 } 424 425 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 426 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 427 } 428 429 // Dispatch compute workload 430 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 431 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 432 433 // Read back and compare 434 { 435 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 436 437 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 438 { 439 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 440 const deUint32 ref = ~inputValues[ndx]; 441 442 if (res != ref) 443 throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]"); 444 } 445 } 446 447 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 448 return STOP; 449 } 450 451private: 452 const int m_numValues; 453 const bool m_isSized; 454 const tcu::IVec3 m_localSize; 455 const tcu::IVec3 m_workSize; 456}; 457 458class WriteToMultipleSSBOCase : public TestCase 459{ 460public: 461 WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 462 : TestCase (context, name, description) 463 , m_numValues (numValues) 464 , m_isSized (isSized) 465 , m_localSize (localSize) 466 , m_workSize (workSize) 467 { 468 DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); 469 } 470 471 IterateResult iterate (void) 472 { 473 std::ostringstream src; 474 src << "#version 310 es\n" 475 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 476 << "layout(binding = 0) buffer Out0 {\n" 477 << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" 478 << "} sb_out0;\n" 479 << "layout(binding = 1) buffer Out1 {\n" 480 << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" 481 << "} sb_out1;\n" 482 << "void main (void) {\n" 483 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" 484 << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" 485 << "\n" 486 << " {\n" 487 << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n" 488 << " uint offset = numValuesPerInv*groupNdx;\n" 489 << "\n" 490 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 491 << " sb_out0.values[offset + ndx] = offset + ndx;\n" 492 << " }\n" 493 << " {\n" 494 << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n" 495 << " uint offset = numValuesPerInv*groupNdx;\n" 496 << "\n" 497 << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" 498 << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n" 499 << " }\n" 500 << "}\n"; 501 502 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 503 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 504 505 m_testCtx.getLog() << program; 506 if (!program.isOk()) 507 TCU_FAIL("Compile failed"); 508 509 const Buffer outputBuffer0 (m_context.getRenderContext()); 510 const deUint32 value0Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values"); 511 const InterfaceVariableInfo value0Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index); 512 const deUint32 block0Size = value0Info.arrayStride*(deUint32)m_numValues; 513 514 const Buffer outputBuffer1 (m_context.getRenderContext()); 515 const deUint32 value1Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values"); 516 const InterfaceVariableInfo value1Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index); 517 const deUint32 block1Size = value1Info.arrayStride*(deUint32)m_numValues; 518 519 TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); 520 TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); 521 522 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 523 524 gl.useProgram(program.getProgram()); 525 526 // Output buffer setup 527 { 528 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); 529 gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW); 530 531 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0); 532 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 533 } 534 { 535 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); 536 gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW); 537 538 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1); 539 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); 540 } 541 542 // Dispatch compute workload 543 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 544 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 545 546 // Read back and compare 547 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); 548 { 549 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT); 550 551 for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++) 552 { 553 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx)); 554 const deUint32 ref = ndx; 555 556 if (res != ref) 557 throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); 558 } 559 } 560 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); 561 { 562 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT); 563 564 for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++) 565 { 566 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx)); 567 const deUint32 ref = m_numValues - ndx; 568 569 if (res != ref) 570 throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); 571 } 572 } 573 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 574 return STOP; 575 } 576 577private: 578 const int m_numValues; 579 const bool m_isSized; 580 const tcu::IVec3 m_localSize; 581 const tcu::IVec3 m_workSize; 582}; 583 584class SSBOLocalBarrierCase : public TestCase 585{ 586public: 587 SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 588 : TestCase (context, name, description) 589 , m_localSize (localSize) 590 , m_workSize (workSize) 591 { 592 } 593 594 IterateResult iterate (void) 595 { 596 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 597 const Buffer outputBuffer (m_context.getRenderContext()); 598 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 599 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 600 const int numValues = workGroupSize*workGroupCount; 601 602 std::ostringstream src; 603 src << "#version 310 es\n" 604 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 605 << "layout(binding = 0) buffer Output {\n" 606 << " coherent uint values[" << numValues << "];\n" 607 << "} sb_out;\n\n" 608 << "shared uint offsets[" << workGroupSize << "];\n\n" 609 << "void main (void) {\n" 610 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 611 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 612 << " uint globalOffs = localSize*globalNdx;\n" 613 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" 614 << "\n" 615 << " sb_out.values[globalOffs + localOffs] = globalOffs;\n" 616 << " memoryBarrierBuffer();\n" 617 << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" 618 << " memoryBarrierBuffer();\n" 619 << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n" 620 << "}\n"; 621 622 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); 623 624 m_testCtx.getLog() << program; 625 if (!program.isOk()) 626 TCU_FAIL("Compile failed"); 627 628 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 629 630 gl.useProgram(program.getProgram()); 631 632 // Output buffer setup 633 { 634 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 635 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 636 637 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 638 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 639 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 640 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 641 } 642 643 // Dispatch compute workload 644 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 645 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 646 647 // Read back and compare 648 { 649 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 650 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 651 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 652 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 653 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 654 655 for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) 656 { 657 for (int localOffs = 0; localOffs < workGroupSize; localOffs++) 658 { 659 const int globalOffs = groupNdx*workGroupSize; 660 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); 661 const int offs0 = localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize); 662 const int offs1 = localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize); 663 const deUint32 ref = (deUint32)(globalOffs + offs0 + offs1); 664 665 if (res != ref) 666 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); 667 } 668 } 669 } 670 671 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 672 return STOP; 673 } 674 675private: 676 const tcu::IVec3 m_localSize; 677 const tcu::IVec3 m_workSize; 678}; 679 680class SSBOBarrierCase : public TestCase 681{ 682public: 683 SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize) 684 : TestCase (context, name, description) 685 , m_workSize (workSize) 686 { 687 } 688 689 IterateResult iterate (void) 690 { 691 const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << 692 ComputeSource("#version 310 es\n" 693 "layout (local_size_x = 1) in;\n" 694 "uniform uint u_baseVal;\n" 695 "layout(binding = 1) buffer Output {\n" 696 " uint values[];\n" 697 "};\n" 698 "void main (void) {\n" 699 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 700 " values[offset] = u_baseVal+offset;\n" 701 "}\n")); 702 const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << 703 ComputeSource("#version 310 es\n" 704 "layout (local_size_x = 1) in;\n" 705 "uniform uint u_baseVal;\n" 706 "layout(binding = 1) buffer Input {\n" 707 " uint values[];\n" 708 "};\n" 709 "layout(binding = 0) buffer Output {\n" 710 " coherent uint sum;\n" 711 "};\n" 712 "void main (void) {\n" 713 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 714 " uint value = values[offset];\n" 715 " atomicAdd(sum, value);\n" 716 "}\n")); 717 718 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 719 const Buffer tempBuffer (m_context.getRenderContext()); 720 const Buffer outputBuffer (m_context.getRenderContext()); 721 const deUint32 baseValue = 127; 722 723 m_testCtx.getLog() << program0 << program1; 724 if (!program0.isOk() || !program1.isOk()) 725 TCU_FAIL("Compile failed"); 726 727 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 728 729 // Temp buffer setup 730 { 731 const deUint32 valueIndex = gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]"); 732 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 733 const deUint32 bufferSize = valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2]; 734 735 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer); 736 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW); 737 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer); 738 GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed"); 739 } 740 741 // Output buffer setup 742 { 743 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 744 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 745 746 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 747 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 748 749 { 750 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); 751 deMemset(bufMap.getPtr(), 0, blockSize); 752 } 753 754 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 755 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 756 } 757 758 // Dispatch compute workload 759 gl.useProgram(program0.getProgram()); 760 gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); 761 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 762 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 763 gl.useProgram(program1.getProgram()); 764 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 765 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); 766 767 // Read back and compare 768 { 769 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 770 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 771 const deUint32 valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); 772 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 773 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 774 775 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset)); 776 deUint32 ref = 0; 777 778 for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++) 779 ref += baseValue + (deUint32)ndx; 780 781 if (res != ref) 782 { 783 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; 784 throw tcu::TestError("Comparison failed"); 785 } 786 } 787 788 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 789 return STOP; 790 } 791 792private: 793 const tcu::IVec3 m_workSize; 794}; 795 796class BasicSharedVarCase : public TestCase 797{ 798public: 799 BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 800 : TestCase (context, name, description) 801 , m_localSize (localSize) 802 , m_workSize (workSize) 803 { 804 } 805 806 IterateResult iterate (void) 807 { 808 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 809 const Buffer outputBuffer (m_context.getRenderContext()); 810 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 811 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 812 const int numValues = workGroupSize*workGroupCount; 813 814 std::ostringstream src; 815 src << "#version 310 es\n" 816 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 817 << "layout(binding = 0) buffer Output {\n" 818 << " uint values[" << numValues << "];\n" 819 << "} sb_out;\n\n" 820 << "shared uint offsets[" << workGroupSize << "];\n\n" 821 << "void main (void) {\n" 822 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 823 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 824 << " uint globalOffs = localSize*globalNdx;\n" 825 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" 826 << "\n" 827 << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n" 828 << " barrier();\n" 829 << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n" 830 << "}\n"; 831 832 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 833 834 m_testCtx.getLog() << program; 835 if (!program.isOk()) 836 TCU_FAIL("Compile failed"); 837 838 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 839 840 gl.useProgram(program.getProgram()); 841 842 // Output buffer setup 843 { 844 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 845 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 846 847 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 848 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 849 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 850 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 851 } 852 853 // Dispatch compute workload 854 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 855 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 856 857 // Read back and compare 858 { 859 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 860 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 861 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 862 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 863 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 864 865 for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) 866 { 867 for (int localOffs = 0; localOffs < workGroupSize; localOffs++) 868 { 869 const int globalOffs = groupNdx*workGroupSize; 870 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); 871 const deUint32 ref = (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1)); 872 873 if (res != ref) 874 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); 875 } 876 } 877 } 878 879 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 880 return STOP; 881 } 882 883private: 884 const tcu::IVec3 m_localSize; 885 const tcu::IVec3 m_workSize; 886}; 887 888class SharedVarAtomicOpCase : public TestCase 889{ 890public: 891 SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 892 : TestCase (context, name, description) 893 , m_localSize (localSize) 894 , m_workSize (workSize) 895 { 896 } 897 898 IterateResult iterate (void) 899 { 900 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 901 const Buffer outputBuffer (m_context.getRenderContext()); 902 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 903 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 904 const int numValues = workGroupSize*workGroupCount; 905 906 std::ostringstream src; 907 src << "#version 310 es\n" 908 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 909 << "layout(binding = 0) buffer Output {\n" 910 << " uint values[" << numValues << "];\n" 911 << "} sb_out;\n\n" 912 << "shared uint count;\n\n" 913 << "void main (void) {\n" 914 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 915 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 916 << " uint globalOffs = localSize*globalNdx;\n" 917 << "\n" 918 << " count = 0u;\n" 919 << " barrier();\n" 920 << " uint oldVal = atomicAdd(count, 1u);\n" 921 << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n" 922 << "}\n"; 923 924 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 925 926 m_testCtx.getLog() << program; 927 if (!program.isOk()) 928 TCU_FAIL("Compile failed"); 929 930 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 931 932 gl.useProgram(program.getProgram()); 933 934 // Output buffer setup 935 { 936 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 937 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 938 939 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 940 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 941 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 942 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 943 } 944 945 // Dispatch compute workload 946 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 947 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 948 949 // Read back and compare 950 { 951 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 952 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 953 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 954 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 955 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 956 957 for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) 958 { 959 for (int localOffs = 0; localOffs < workGroupSize; localOffs++) 960 { 961 const int globalOffs = groupNdx*workGroupSize; 962 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); 963 const deUint32 ref = (deUint32)(localOffs+1); 964 965 if (res != ref) 966 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); 967 } 968 } 969 } 970 971 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 972 return STOP; 973 } 974 975private: 976 const tcu::IVec3 m_localSize; 977 const tcu::IVec3 m_workSize; 978}; 979 980class CopyImageToSSBOCase : public TestCase 981{ 982public: 983 CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize) 984 : TestCase (context, name, description) 985 , m_localSize (localSize) 986 , m_imageSize (imageSize) 987 { 988 DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); 989 DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); 990 } 991 992 IterateResult iterate (void) 993 { 994 995 std::ostringstream src; 996 src << "#version 310 es\n" 997 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" 998 << "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n" 999 << "layout(binding = 0) buffer Output {\n" 1000 << " uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n" 1001 << "} sb_out;\n\n" 1002 << "void main (void) {\n" 1003 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" 1004 << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n" 1005 << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n" 1006 << "}\n"; 1007 1008 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1009 const Buffer outputBuffer (m_context.getRenderContext()); 1010 const Texture inputTexture (m_context.getRenderContext()); 1011 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 1012 const tcu::IVec2 workSize = m_imageSize / m_localSize; 1013 de::Random rnd (0xab2c7); 1014 vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]); 1015 1016 m_testCtx.getLog() << program; 1017 if (!program.isOk()) 1018 TCU_FAIL("Compile failed"); 1019 1020 m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; 1021 1022 gl.useProgram(program.getProgram()); 1023 1024 // Input values 1025 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) 1026 *i = rnd.getUint32(); 1027 1028 // Input image setup 1029 gl.bindTexture(GL_TEXTURE_2D, *inputTexture); 1030 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); 1031 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]); 1032 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1033 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1034 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1035 1036 // Bind to unit 1 1037 gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI); 1038 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1039 1040 // Output buffer setup 1041 { 1042 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1043 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1044 1045 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 1046 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 1047 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 1048 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 1049 } 1050 1051 // Dispatch compute workload 1052 gl.dispatchCompute(workSize[0], workSize[1], 1); 1053 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1054 1055 // Read back and compare 1056 { 1057 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1058 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1059 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 1060 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1061 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 1062 1063 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 1064 1065 for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) 1066 { 1067 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); 1068 const deUint32 ref = inputValues[ndx]; 1069 1070 if (res != ref) 1071 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); 1072 } 1073 } 1074 1075 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1076 return STOP; 1077 } 1078 1079private: 1080 const tcu::IVec2 m_localSize; 1081 const tcu::IVec2 m_imageSize; 1082}; 1083 1084class CopySSBOToImageCase : public TestCase 1085{ 1086public: 1087 CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize) 1088 : TestCase (context, name, description) 1089 , m_localSize (localSize) 1090 , m_imageSize (imageSize) 1091 { 1092 DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); 1093 DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); 1094 } 1095 1096 IterateResult iterate (void) 1097 { 1098 1099 std::ostringstream src; 1100 src << "#version 310 es\n" 1101 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" 1102 << "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n" 1103 << "buffer Input {\n" 1104 << " uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n" 1105 << "} sb_in;\n\n" 1106 << "void main (void) {\n" 1107 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" 1108 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" 1109 << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n" 1110 << "}\n"; 1111 1112 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1113 const Buffer inputBuffer (m_context.getRenderContext()); 1114 const Texture outputTexture (m_context.getRenderContext()); 1115 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 1116 const tcu::IVec2 workSize = m_imageSize / m_localSize; 1117 de::Random rnd (0x77238ac2); 1118 vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]); 1119 1120 m_testCtx.getLog() << program; 1121 if (!program.isOk()) 1122 TCU_FAIL("Compile failed"); 1123 1124 m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; 1125 1126 gl.useProgram(program.getProgram()); 1127 1128 // Input values 1129 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) 1130 *i = rnd.getUint32(); 1131 1132 // Input buffer setup 1133 { 1134 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); 1135 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 1136 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); 1137 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1138 1139 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); 1140 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 1141 1142 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 1143 1144 { 1145 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 1146 1147 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 1148 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 1149 } 1150 1151 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); 1152 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 1153 } 1154 1155 // Output image setup 1156 gl.bindTexture(GL_TEXTURE_2D, *outputTexture); 1157 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); 1158 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1159 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1160 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1161 1162 // Bind to unit 1 1163 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI); 1164 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1165 1166 // Dispatch compute workload 1167 gl.dispatchCompute(workSize[0], workSize[1], 1); 1168 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1169 1170 // Read back and compare 1171 { 1172 Framebuffer fbo (m_context.getRenderContext()); 1173 vector<deUint32> pixels (inputValues.size()*4); 1174 1175 gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); 1176 gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); 1177 TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); 1178 1179 // \note In ES3 we have to use GL_RGBA_INTEGER 1180 gl.readBuffer(GL_COLOR_ATTACHMENT0); 1181 gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); 1182 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); 1183 1184 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 1185 { 1186 const deUint32 res = pixels[ndx*4]; 1187 const deUint32 ref = inputValues[ndx]; 1188 1189 if (res != ref) 1190 throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx)); 1191 } 1192 } 1193 1194 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1195 return STOP; 1196 } 1197 1198private: 1199 const tcu::IVec2 m_localSize; 1200 const tcu::IVec2 m_imageSize; 1201}; 1202 1203class ImageAtomicOpCase : public TestCase 1204{ 1205public: 1206 ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize) 1207 : TestCase (context, name, description) 1208 , m_localSize (localSize) 1209 , m_imageSize (imageSize) 1210 { 1211 } 1212 1213 void init (void) 1214 { 1215 if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic")) 1216 throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension"); 1217 } 1218 1219 IterateResult iterate (void) 1220 { 1221 1222 std::ostringstream src; 1223 src << "#version 310 es\n" 1224 << "#extension GL_OES_shader_image_atomic : require\n" 1225 << "layout (local_size_x = " << m_localSize << ") in;\n" 1226 << "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n" 1227 << "buffer Input {\n" 1228 << " uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n" 1229 << "} sb_in;\n\n" 1230 << "void main (void) {\n" 1231 << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" 1232 << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" 1233 << "\n" 1234 << " if (gl_LocalInvocationIndex == 0u)\n" 1235 << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n" 1236 << " barrier();\n" 1237 << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n" 1238 << "}\n"; 1239 1240 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1241 const Buffer inputBuffer (m_context.getRenderContext()); 1242 const Texture outputTexture (m_context.getRenderContext()); 1243 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); 1244 de::Random rnd (0x77238ac2); 1245 vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]*m_localSize); 1246 1247 m_testCtx.getLog() << program; 1248 if (!program.isOk()) 1249 TCU_FAIL("Compile failed"); 1250 1251 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage; 1252 1253 gl.useProgram(program.getProgram()); 1254 1255 // Input values 1256 for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) 1257 *i = rnd.getUint32(); 1258 1259 // Input buffer setup 1260 { 1261 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); 1262 const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); 1263 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); 1264 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1265 1266 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); 1267 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); 1268 1269 TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); 1270 1271 { 1272 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); 1273 1274 for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) 1275 *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; 1276 } 1277 1278 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); 1279 GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); 1280 } 1281 1282 // Output image setup 1283 gl.bindTexture(GL_TEXTURE_2D, *outputTexture); 1284 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); 1285 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1286 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1287 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1288 1289 // Bind to unit 1 1290 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); 1291 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1292 1293 // Dispatch compute workload 1294 gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1); 1295 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1296 1297 // Read back and compare 1298 { 1299 Framebuffer fbo (m_context.getRenderContext()); 1300 vector<deUint32> pixels (m_imageSize[0]*m_imageSize[1]*4); 1301 1302 gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); 1303 gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); 1304 TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); 1305 1306 // \note In ES3 we have to use GL_RGBA_INTEGER 1307 gl.readBuffer(GL_COLOR_ATTACHMENT0); 1308 gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); 1309 GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); 1310 1311 for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++) 1312 { 1313 const deUint32 res = pixels[pixelNdx*4]; 1314 deUint32 ref = 0; 1315 1316 for (int offs = 0; offs < m_localSize; offs++) 1317 ref += inputValues[pixelNdx*m_localSize + offs]; 1318 1319 if (res != ref) 1320 throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx)); 1321 } 1322 } 1323 1324 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1325 return STOP; 1326 } 1327 1328private: 1329 const int m_localSize; 1330 const tcu::IVec2 m_imageSize; 1331}; 1332 1333class ImageBarrierCase : public TestCase 1334{ 1335public: 1336 ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize) 1337 : TestCase (context, name, description) 1338 , m_workSize (workSize) 1339 { 1340 } 1341 1342 IterateResult iterate (void) 1343 { 1344 const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << 1345 ComputeSource("#version 310 es\n" 1346 "layout (local_size_x = 1) in;\n" 1347 "uniform uint u_baseVal;\n" 1348 "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n" 1349 "void main (void) {\n" 1350 " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 1351 " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n" 1352 "}\n")); 1353 const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << 1354 ComputeSource("#version 310 es\n" 1355 "layout (local_size_x = 1) in;\n" 1356 "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n" 1357 "layout(binding = 0) buffer Output {\n" 1358 " coherent uint sum;\n" 1359 "};\n" 1360 "void main (void) {\n" 1361 " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n" 1362 " atomicAdd(sum, value);\n" 1363 "}\n")); 1364 1365 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1366 const Texture tempTexture (m_context.getRenderContext()); 1367 const Buffer outputBuffer (m_context.getRenderContext()); 1368 const deUint32 baseValue = 127; 1369 1370 m_testCtx.getLog() << program0 << program1; 1371 if (!program0.isOk() || !program1.isOk()) 1372 TCU_FAIL("Compile failed"); 1373 1374 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 1375 1376 // Temp texture setup 1377 gl.bindTexture(GL_TEXTURE_2D, *tempTexture); 1378 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]); 1379 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 1380 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 1381 GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); 1382 1383 // Bind to unit 2 1384 gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); 1385 GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); 1386 1387 // Output buffer setup 1388 { 1389 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1390 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1391 1392 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 1393 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 1394 1395 { 1396 const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); 1397 deMemset(bufMap.getPtr(), 0, blockSize); 1398 } 1399 1400 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 1401 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 1402 } 1403 1404 // Dispatch compute workload 1405 gl.useProgram(program0.getProgram()); 1406 gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); 1407 gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); 1408 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); 1409 gl.useProgram(program1.getProgram()); 1410 gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); 1411 GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); 1412 1413 // Read back and compare 1414 { 1415 const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1416 const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1417 const deUint32 valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); 1418 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1419 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 1420 1421 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset)); 1422 deUint32 ref = 0; 1423 1424 for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++) 1425 ref += baseValue + (deUint32)ndx; 1426 1427 if (res != ref) 1428 { 1429 m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; 1430 throw tcu::TestError("Comparison failed"); 1431 } 1432 } 1433 1434 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1435 return STOP; 1436 } 1437 1438private: 1439 const tcu::IVec2 m_workSize; 1440}; 1441 1442class AtomicCounterCase : public TestCase 1443{ 1444public: 1445 AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) 1446 : TestCase (context, name, description) 1447 , m_localSize (localSize) 1448 , m_workSize (workSize) 1449 { 1450 } 1451 1452 IterateResult iterate (void) 1453 { 1454 const glw::Functions& gl = m_context.getRenderContext().getFunctions(); 1455 const Buffer outputBuffer (m_context.getRenderContext()); 1456 const Buffer counterBuffer (m_context.getRenderContext()); 1457 const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; 1458 const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; 1459 const int numValues = workGroupSize*workGroupCount; 1460 1461 std::ostringstream src; 1462 src << "#version 310 es\n" 1463 << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" 1464 << "layout(binding = 0) buffer Output {\n" 1465 << " uint values[" << numValues << "];\n" 1466 << "} sb_out;\n\n" 1467 << "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n" 1468 << "void main (void) {\n" 1469 << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" 1470 << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" 1471 << " uint globalOffs = localSize*globalNdx;\n" 1472 << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" 1473 << "\n" 1474 << " uint oldVal = atomicCounterIncrement(u_count);\n" 1475 << " sb_out.values[globalOffs+localOffs] = oldVal;\n" 1476 << "}\n"; 1477 1478 const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); 1479 1480 m_testCtx.getLog() << program; 1481 if (!program.isOk()) 1482 TCU_FAIL("Compile failed"); 1483 1484 m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; 1485 1486 gl.useProgram(program.getProgram()); 1487 1488 // Atomic counter buffer setup 1489 { 1490 const deUint32 uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); 1491 const deUint32 bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); 1492 const deUint32 bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); 1493 1494 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer); 1495 gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ); 1496 1497 { 1498 const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT); 1499 deMemset(memMap.getPtr(), 0, (int)bufferSize); 1500 } 1501 1502 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer); 1503 GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed"); 1504 } 1505 1506 // Output buffer setup 1507 { 1508 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1509 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1510 1511 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); 1512 gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); 1513 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); 1514 GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); 1515 } 1516 1517 // Dispatch compute workload 1518 gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); 1519 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); 1520 1521 // Read back and compare atomic counter 1522 { 1523 const deUint32 uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); 1524 const deUint32 uniformOffset = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET); 1525 const deUint32 bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); 1526 const deUint32 bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); 1527 const BufferMemMap bufMap (gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT); 1528 1529 const deUint32 resVal = *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset)); 1530 1531 if (resVal != (deUint32)numValues) 1532 throw tcu::TestError("Invalid atomic counter value"); 1533 } 1534 1535 // Read back and compare SSBO 1536 { 1537 const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); 1538 const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); 1539 const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); 1540 const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); 1541 const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); 1542 deUint32 valSum = 0; 1543 deUint32 refSum = 0; 1544 1545 for (int valNdx = 0; valNdx < numValues; valNdx++) 1546 { 1547 const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx)); 1548 1549 valSum += res; 1550 refSum += (deUint32)valNdx; 1551 1552 if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues)) 1553 throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]"); 1554 } 1555 1556 if (valSum != refSum) 1557 throw tcu::TestError("Total sum of values in Output.values doesn't match"); 1558 } 1559 1560 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); 1561 return STOP; 1562 } 1563 1564private: 1565 const tcu::IVec3 m_localSize; 1566 const tcu::IVec3 m_workSize; 1567}; 1568 1569} // anonymous 1570 1571BasicComputeShaderTests::BasicComputeShaderTests (Context& context) 1572 : TestCaseGroup(context, "basic", "Basic Compute Shader Tests") 1573{ 1574} 1575 1576BasicComputeShaderTests::~BasicComputeShaderTests (void) 1577{ 1578} 1579 1580void BasicComputeShaderTests::init (void) 1581{ 1582 addChild(new EmptyComputeShaderCase(m_context)); 1583 1584 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1585 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1))); 1586 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1))); 1587 addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1588 1589 addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1590 addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1))); 1591 addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1592 1593 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1594 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1595 1596 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1597 addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1598 1599 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1600 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1601 1602 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1603 addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); 1604 1605 addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1606 addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1607 addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1608 1609 addChild(new SSBOBarrierCase (m_context, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1))); 1610 addChild(new SSBOBarrierCase (m_context, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7))); 1611 1612 addChild(new BasicSharedVarCase (m_context, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1613 addChild(new BasicSharedVarCase (m_context, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1614 addChild(new BasicSharedVarCase (m_context, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); 1615 addChild(new BasicSharedVarCase (m_context, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1616 1617 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1618 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1619 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); 1620 addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1621 1622 addChild(new CopyImageToSSBOCase (m_context, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64))); 1623 addChild(new CopyImageToSSBOCase (m_context, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512))); 1624 1625 addChild(new CopySSBOToImageCase (m_context, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1,1), tcu::IVec2(64,64))); 1626 addChild(new CopySSBOToImageCase (m_context, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2,4), tcu::IVec2(512,512))); 1627 1628 addChild(new ImageAtomicOpCase (m_context, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64))); 1629 addChild(new ImageAtomicOpCase (m_context, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64))); 1630 1631 addChild(new ImageBarrierCase (m_context, "image_barrier_single", "Image barrier", tcu::IVec2(1,1))); 1632 addChild(new ImageBarrierCase (m_context, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64))); 1633 1634 addChild(new AtomicCounterCase (m_context, "atomic_counter_single_invocation", "Basic atomic counter test", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); 1635 addChild(new AtomicCounterCase (m_context, "atomic_counter_single_group", "Basic atomic counter test", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); 1636 addChild(new AtomicCounterCase (m_context, "atomic_counter_multiple_invocations", "Basic atomic counter test", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); 1637 addChild(new AtomicCounterCase (m_context, "atomic_counter_multiple_groups", "Basic atomic counter test", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); 1638} 1639 1640} // Functional 1641} // gles31 1642} // deqp 1643