1/*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2016 The Khronos Group Inc. 6 * Copyright (c) 2016 The Android Open Source Project 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 *//*! 21 * \file 22 * \brief Indirect Compute Dispatch tests 23 *//*--------------------------------------------------------------------*/ 24 25#include "vktComputeIndirectComputeDispatchTests.hpp" 26#include "vktComputeTestsUtil.hpp" 27 28#include <string> 29#include <map> 30#include <vector> 31 32#include "vkDefs.hpp" 33#include "vkRef.hpp" 34#include "vkRefUtil.hpp" 35#include "vktTestCase.hpp" 36#include "vktTestCaseUtil.hpp" 37#include "vkPlatform.hpp" 38#include "vkPrograms.hpp" 39#include "vkMemUtil.hpp" 40#include "vkBuilderUtil.hpp" 41#include "vkQueryUtil.hpp" 42 43#include "tcuVector.hpp" 44#include "tcuVectorUtil.hpp" 45#include "tcuTestLog.hpp" 46#include "tcuRGBA.hpp" 47#include "tcuStringTemplate.hpp" 48 49#include "deUniquePtr.hpp" 50#include "deSharedPtr.hpp" 51#include "deStringUtil.hpp" 52#include "deArrayUtil.hpp" 53 54#include "gluShaderUtil.hpp" 55 56namespace vkt 57{ 58namespace compute 59{ 60namespace 61{ 62 63enum 64{ 65 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(deUint32), // uvec3 + uint 66 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(deUint32), 67 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(deUint32), 68}; 69 70vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface& instance_interface, 71 const vk::VkPhysicalDevice physicalDevice, 72 const vk::VkDeviceSize baseSize) 73{ 74 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged. 75 vk::VkPhysicalDeviceProperties deviceProperties; 76 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties); 77 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment; 78 79 if (alignment == 0 || (baseSize % alignment == 0)) 80 return baseSize; 81 else 82 return (baseSize / alignment + 1)*alignment; 83} 84 85struct DispatchCommand 86{ 87 DispatchCommand (const deIntptr offset, 88 const tcu::UVec3& numWorkGroups) 89 : m_offset (offset) 90 , m_numWorkGroups (numWorkGroups) {} 91 92 deIntptr m_offset; 93 tcu::UVec3 m_numWorkGroups; 94}; 95 96typedef std::vector<DispatchCommand> DispatchCommandsVec; 97 98struct DispatchCaseDesc 99{ 100 DispatchCaseDesc (const char* name, 101 const char* description, 102 const deUintptr bufferSize, 103 const tcu::UVec3 workGroupSize, 104 const DispatchCommandsVec& dispatchCommands) 105 : m_name (name) 106 , m_description (description) 107 , m_bufferSize (bufferSize) 108 , m_workGroupSize (workGroupSize) 109 , m_dispatchCommands (dispatchCommands) {} 110 111 const char* m_name; 112 const char* m_description; 113 const deUintptr m_bufferSize; 114 const tcu::UVec3 m_workGroupSize; 115 const DispatchCommandsVec m_dispatchCommands; 116}; 117 118class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance 119{ 120public: 121 IndirectDispatchInstanceBufferUpload (Context& context, 122 const std::string& name, 123 const deUintptr bufferSize, 124 const tcu::UVec3& workGroupSize, 125 const DispatchCommandsVec& dispatchCommands); 126 127 virtual ~IndirectDispatchInstanceBufferUpload (void) {} 128 129 virtual tcu::TestStatus iterate (void); 130 131protected: 132 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, 133 const Buffer& indirectBuffer); 134 135 deBool verifyResultBuffer (const Buffer& resultBuffer, 136 const vk::VkDeviceSize resultBlockSize, 137 const vk::VkDeviceSize resultBufferSize) const; 138 139 Context& m_context; 140 const std::string m_name; 141 142 const vk::DeviceInterface& m_device_interface; 143 const vk::VkDevice m_device; 144 145 const vk::VkQueue m_queue; 146 const deUint32 m_queueFamilyIndex; 147 148 const deUintptr m_bufferSize; 149 const tcu::UVec3 m_workGroupSize; 150 const DispatchCommandsVec m_dispatchCommands; 151 152 vk::Allocator& m_allocator; 153 154private: 155 IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&); 156 IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&); 157}; 158 159IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context& context, 160 const std::string& name, 161 const deUintptr bufferSize, 162 const tcu::UVec3& workGroupSize, 163 const DispatchCommandsVec& dispatchCommands) 164 : vkt::TestInstance (context) 165 , m_context (context) 166 , m_name (name) 167 , m_device_interface (context.getDeviceInterface()) 168 , m_device (context.getDevice()) 169 , m_queue (context.getUniversalQueue()) 170 , m_queueFamilyIndex (context.getUniversalQueueFamilyIndex()) 171 , m_bufferSize (bufferSize) 172 , m_workGroupSize (workGroupSize) 173 , m_dispatchCommands (dispatchCommands) 174 , m_allocator (context.getDefaultAllocator()) 175{ 176} 177 178void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer) 179{ 180 DE_UNREF(commandBuffer); 181 182 const vk::Allocation& alloc = indirectBuffer.getAllocation(); 183 deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr()); 184 185 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter) 186 { 187 DE_ASSERT(cmdIter->m_offset >= 0); 188 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0); 189 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize); 190 191 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset]; 192 193 dstPtr[0] = cmdIter->m_numWorkGroups[0]; 194 dstPtr[1] = cmdIter->m_numWorkGroups[1]; 195 dstPtr[2] = cmdIter->m_numWorkGroups[2]; 196 } 197 198 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), m_bufferSize); 199} 200 201tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void) 202{ 203 tcu::TestContext& testCtx = m_context.getTestContext(); 204 205 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage; 206 { 207 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)"); 208 209 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx) 210 { 211 testCtx.getLog() 212 << tcu::TestLog::Message 213 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups 214 << tcu::TestLog::EndMessage; 215 } 216 } 217 218 // Create result buffer 219 const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE); 220 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size(); 221 222 Buffer resultBuffer( 223 m_device_interface, m_device, m_allocator, 224 makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), 225 vk::MemoryRequirement::HostVisible); 226 227 { 228 const vk::Allocation& alloc = resultBuffer.getAllocation(); 229 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr()); 230 231 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx) 232 { 233 deUint8* const dstPtr = &resultDataPtr[resultBlockSize*cmdNdx]; 234 235 *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0]; 236 *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1]; 237 *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2]; 238 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0; 239 } 240 241 vk::flushMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize); 242 } 243 244 // Create verify compute shader 245 const vk::Unique<vk::VkShaderModule> verifyShader(createShaderModule( 246 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify"), 0u)); 247 248 // Create descriptorSetLayout 249 vk::DescriptorSetLayoutBuilder layoutBuilder; 250 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT); 251 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device)); 252 253 // Create compute pipeline 254 const vk::Unique<vk::VkPipelineLayout> pipelineLayout(makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout)); 255 const vk::Unique<vk::VkPipeline> computePipeline(makeComputePipeline(m_device_interface, m_device, *pipelineLayout, *verifyShader)); 256 257 // Create descriptor pool 258 const vk::Unique<vk::VkDescriptorPool> descriptorPool( 259 vk::DescriptorPoolBuilder() 260 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size()) 261 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size()))); 262 263 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier( 264 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize); 265 266 // Create command buffer 267 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(m_device_interface, m_device, m_queueFamilyIndex)); 268 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(m_device_interface, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY)); 269 270 // Begin recording commands 271 beginCommandBuffer(m_device_interface, *cmdBuffer); 272 273 // Create indirect buffer 274 Buffer indirectBuffer( 275 m_device_interface, m_device, m_allocator, 276 makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), 277 vk::MemoryRequirement::HostVisible); 278 fillIndirectBufferData(*cmdBuffer, indirectBuffer); 279 280 // Bind compute pipeline 281 m_device_interface.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline); 282 283 // Allocate descriptor sets 284 typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet; 285 std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size()); 286 287 vk::VkDeviceSize curOffset = 0; 288 289 // Create descriptor sets 290 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx) 291 { 292 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>( 293 makeDescriptorSet(m_device_interface, m_device, *descriptorPool, *descriptorSetLayout))); 294 295 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize); 296 297 vk::DescriptorSetUpdateBuilder descriptorSetBuilder; 298 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo); 299 descriptorSetBuilder.update(m_device_interface, m_device); 300 301 // Bind descriptor set 302 m_device_interface.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL); 303 304 // Dispatch indirect compute command 305 m_device_interface.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset); 306 307 curOffset += resultBlockSize; 308 } 309 310 // Insert memory barrier 311 m_device_interface.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0, 312 0, (const vk::VkMemoryBarrier*)DE_NULL, 313 1, &ssboPostBarrier, 314 0, (const vk::VkImageMemoryBarrier*)DE_NULL); 315 316 // End recording commands 317 endCommandBuffer(m_device_interface, *cmdBuffer); 318 319 // Wait for command buffer execution finish 320 submitCommandsAndWait(m_device_interface, m_device, m_queue, *cmdBuffer); 321 322 // Check if result buffer contains valid values 323 if (verifyResultBuffer(resultBuffer, resultBlockSize, resultBufferSize)) 324 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass"); 325 else 326 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer"); 327} 328 329deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const Buffer& resultBuffer, 330 const vk::VkDeviceSize resultBlockSize, 331 const vk::VkDeviceSize resultBufferSize) const 332{ 333 deBool allOk = true; 334 const vk::Allocation& alloc = resultBuffer.getAllocation(); 335 vk::invalidateMappedMemoryRange(m_device_interface, m_device, alloc.getMemory(), alloc.getOffset(), resultBufferSize); 336 337 const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr()); 338 339 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++) 340 { 341 const DispatchCommand& cmd = m_dispatchCommands[cmdNdx]; 342 const deUint8* const srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize; 343 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET); 344 const deUint32 numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2]; 345 const deUint32 numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2]; 346 const deUint32 expectedCount = numInvocationsPerGroup * numGroups; 347 348 if (numPassed != expectedCount) 349 { 350 tcu::TestContext& testCtx = m_context.getTestContext(); 351 352 testCtx.getLog() 353 << tcu::TestLog::Message 354 << "ERROR: got invalid result for invocation " << cmdNdx 355 << ": got numPassed = " << numPassed << ", expected " << expectedCount 356 << tcu::TestLog::EndMessage; 357 358 allOk = false; 359 } 360 } 361 362 return allOk; 363} 364 365class IndirectDispatchCaseBufferUpload : public vkt::TestCase 366{ 367public: 368 IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx, 369 const DispatchCaseDesc& caseDesc, 370 const glu::GLSLVersion glslVersion); 371 372 virtual ~IndirectDispatchCaseBufferUpload (void) {} 373 374 virtual void initPrograms (vk::SourceCollections& programCollection) const; 375 virtual TestInstance* createInstance (Context& context) const; 376 377protected: 378 const deUintptr m_bufferSize; 379 const tcu::UVec3 m_workGroupSize; 380 const DispatchCommandsVec m_dispatchCommands; 381 const glu::GLSLVersion m_glslVersion; 382 383private: 384 IndirectDispatchCaseBufferUpload (const vkt::TestCase&); 385 IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&); 386}; 387 388IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx, 389 const DispatchCaseDesc& caseDesc, 390 const glu::GLSLVersion glslVersion) 391 : vkt::TestCase (testCtx, caseDesc.m_name, caseDesc.m_description) 392 , m_bufferSize (caseDesc.m_bufferSize) 393 , m_workGroupSize (caseDesc.m_workGroupSize) 394 , m_dispatchCommands (caseDesc.m_dispatchCommands) 395 , m_glslVersion (glslVersion) 396{ 397} 398 399void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const 400{ 401 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion); 402 403 std::ostringstream verifyBuffer; 404 405 verifyBuffer 406 << versionDecl << "\n" 407 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n" 408 << "layout(set = 0, binding = 0, std430) buffer Result\n" 409 << "{\n" 410 << " uvec3 expectedGroupCount;\n" 411 << " coherent uint numPassed;\n" 412 << "} result;\n" 413 << "void main (void)\n" 414 << "{\n" 415 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n" 416 << " atomicAdd(result.numPassed, 1u);\n" 417 << "}\n"; 418 419 std::map<std::string, std::string> args; 420 421 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x()); 422 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y()); 423 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z()); 424 425 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args); 426 427 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString); 428} 429 430TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const 431{ 432 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands); 433} 434 435class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload 436{ 437public: 438 IndirectDispatchInstanceBufferGenerate (Context& context, 439 const std::string& name, 440 const deUintptr bufferSize, 441 const tcu::UVec3& workGroupSize, 442 const DispatchCommandsVec& dispatchCommands) 443 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands) {} 444 445 virtual ~IndirectDispatchInstanceBufferGenerate (void) {} 446 447protected: 448 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, 449 const Buffer& indirectBuffer); 450 451 vk::Move<vk::VkDescriptorPool> m_descriptorPool; 452 vk::Move<vk::VkDescriptorSet> m_descriptorSet; 453 vk::Move<vk::VkPipelineLayout> m_pipelineLayout; 454 vk::Move<vk::VkPipeline> m_computePipeline; 455 456private: 457 IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&); 458 IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&); 459}; 460 461void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const Buffer& indirectBuffer) 462{ 463 // Create compute shader that generates data for indirect buffer 464 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule( 465 m_device_interface, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u)); 466 467 // Create descriptorSetLayout 468 vk::DescriptorSetLayoutBuilder layoutBuilder; 469 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT); 470 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(m_device_interface, m_device)); 471 472 // Create compute pipeline 473 m_pipelineLayout = makePipelineLayout(m_device_interface, m_device, *descriptorSetLayout); 474 m_computePipeline = makeComputePipeline(m_device_interface, m_device, *m_pipelineLayout, *genIndirectBufferDataShader); 475 476 // Create descriptor pool 477 m_descriptorPool = vk::DescriptorPoolBuilder() 478 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) 479 .build(m_device_interface, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 480 481 // Create descriptor set 482 m_descriptorSet = makeDescriptorSet(m_device_interface, m_device, *m_descriptorPool, *descriptorSetLayout); 483 484 const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize); 485 486 vk::DescriptorSetUpdateBuilder descriptorSetBuilder; 487 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo); 488 descriptorSetBuilder.update(m_device_interface, m_device); 489 490 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier( 491 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize); 492 493 // Bind compute pipeline 494 m_device_interface.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline); 495 496 // Bind descriptor set 497 m_device_interface.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL); 498 499 // Dispatch compute command 500 m_device_interface.cmdDispatch(commandBuffer, 1u, 1u, 1u); 501 502 // Insert memory barrier 503 m_device_interface.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0, 504 0, (const vk::VkMemoryBarrier*)DE_NULL, 505 1, &bufferBarrier, 506 0, (const vk::VkImageMemoryBarrier*)DE_NULL); 507} 508 509class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload 510{ 511public: 512 IndirectDispatchCaseBufferGenerate (tcu::TestContext& testCtx, 513 const DispatchCaseDesc& caseDesc, 514 const glu::GLSLVersion glslVersion) 515 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion) {} 516 517 virtual ~IndirectDispatchCaseBufferGenerate (void) {} 518 519 virtual void initPrograms (vk::SourceCollections& programCollection) const; 520 virtual TestInstance* createInstance (Context& context) const; 521 522private: 523 IndirectDispatchCaseBufferGenerate (const vkt::TestCase&); 524 IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&); 525}; 526 527void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const 528{ 529 IndirectDispatchCaseBufferUpload::initPrograms(programCollection); 530 531 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion); 532 533 std::ostringstream computeBuffer; 534 535 // Header 536 computeBuffer 537 << versionDecl << "\n" 538 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n" 539 << "layout(set = 0, binding = 0, std430) buffer Out\n" 540 << "{\n" 541 << " highp uint data[];\n" 542 << "};\n" 543 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n" 544 << "{\n" 545 << " data[offset+0u] = numWorkGroups.x;\n" 546 << " data[offset+1u] = numWorkGroups.y;\n" 547 << " data[offset+2u] = numWorkGroups.z;\n" 548 << "}\n" 549 << "void main (void)\n" 550 << "{\n"; 551 552 // Dispatch commands 553 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter) 554 { 555 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32)); 556 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset); 557 558 computeBuffer 559 << "\twriteCmd(" << offs << "u, uvec3(" 560 << cmdIter->m_numWorkGroups.x() << "u, " 561 << cmdIter->m_numWorkGroups.y() << "u, " 562 << cmdIter->m_numWorkGroups.z() << "u));\n"; 563 } 564 565 // Ending 566 computeBuffer << "}\n"; 567 568 std::string computeString = computeBuffer.str(); 569 570 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString); 571} 572 573TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const 574{ 575 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands); 576} 577 578DispatchCommandsVec commandsVec (const DispatchCommand& cmd) 579{ 580 DispatchCommandsVec vec; 581 vec.push_back(cmd); 582 return vec; 583} 584 585DispatchCommandsVec commandsVec (const DispatchCommand& cmd0, 586 const DispatchCommand& cmd1, 587 const DispatchCommand& cmd2, 588 const DispatchCommand& cmd3, 589 const DispatchCommand& cmd4) 590{ 591 DispatchCommandsVec vec; 592 vec.push_back(cmd0); 593 vec.push_back(cmd1); 594 vec.push_back(cmd2); 595 vec.push_back(cmd3); 596 vec.push_back(cmd4); 597 return vec; 598} 599 600DispatchCommandsVec commandsVec (const DispatchCommand& cmd0, 601 const DispatchCommand& cmd1, 602 const DispatchCommand& cmd2, 603 const DispatchCommand& cmd3, 604 const DispatchCommand& cmd4, 605 const DispatchCommand& cmd5, 606 const DispatchCommand& cmd6) 607{ 608 DispatchCommandsVec vec; 609 vec.push_back(cmd0); 610 vec.push_back(cmd1); 611 vec.push_back(cmd2); 612 vec.push_back(cmd3); 613 vec.push_back(cmd4); 614 vec.push_back(cmd5); 615 vec.push_back(cmd6); 616 return vec; 617} 618 619} // anonymous ns 620 621tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx) 622{ 623 static const DispatchCaseDesc s_dispatchCases[] = 624 { 625 DispatchCaseDesc("single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 626 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))) 627 ), 628 DispatchCaseDesc("multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 629 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))) 630 ), 631 DispatchCaseDesc("multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1), 632 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))) 633 ), 634 DispatchCaseDesc("small_offset", "Small offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 635 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))) 636 ), 637 DispatchCaseDesc("large_offset", "Large offset", (2 << 20), tcu::UVec3(1, 1, 1), 638 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))) 639 ), 640 DispatchCaseDesc("large_offset_multiple_invocations", "Large offset, multiple invocations", (2 << 20), tcu::UVec3(2, 3, 1), 641 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))) 642 ), 643 DispatchCaseDesc("empty_command", "Empty command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 644 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))) 645 ), 646 DispatchCaseDesc("multi_dispatch", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2), 647 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)), 648 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)), 649 DispatchCommand(104, tcu::UVec3(1, 3, 1)), 650 DispatchCommand(40, tcu::UVec3(1, 1, 7)), 651 DispatchCommand(52, tcu::UVec3(1, 1, 4))) 652 ), 653 DispatchCaseDesc("multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", 1 << 10, tcu::UVec3(3, 1, 2), 654 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)), 655 DispatchCommand(0, tcu::UVec3(1, 1, 1)), 656 DispatchCommand(0, tcu::UVec3(1, 1, 1)), 657 DispatchCommand(104, tcu::UVec3(1, 3, 1)), 658 DispatchCommand(104, tcu::UVec3(1, 3, 1)), 659 DispatchCommand(52, tcu::UVec3(1, 1, 4)), 660 DispatchCommand(52, tcu::UVec3(1, 1, 4))) 661 ), 662 }; 663 664 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch", "Indirect dispatch tests")); 665 666 tcu::TestCaseGroup* const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer", ""); 667 indirectComputeDispatchTests->addChild(groupBufferUpload); 668 669 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++) 670 { 671 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES)); 672 } 673 674 tcu::TestCaseGroup* const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute", ""); 675 indirectComputeDispatchTests->addChild(groupBufferGenerate); 676 677 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++) 678 { 679 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, s_dispatchCases[ndx], glu::GLSL_VERSION_310_ES)); 680 } 681 682 return indirectComputeDispatchTests.release(); 683} 684 685} // compute 686} // vkt 687