1/*------------------------------------------------------------------------- 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2015 Google Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Test Case Skeleton Based on Compute Shaders 22 *//*--------------------------------------------------------------------*/ 23 24#include "vktSpvAsmComputeShaderCase.hpp" 25 26#include "deSharedPtr.hpp" 27#include "deSTLUtil.hpp" 28 29#include "vkBuilderUtil.hpp" 30#include "vkMemUtil.hpp" 31#include "vkPlatform.hpp" 32#include "vkRefUtil.hpp" 33#include "vkQueryUtil.hpp" 34#include "vkTypeUtil.hpp" 35 36namespace 37{ 38 39using namespace vk; 40using std::vector; 41 42typedef vkt::SpirVAssembly::AllocationMp AllocationMp; 43typedef vkt::SpirVAssembly::AllocationSp AllocationSp; 44 45typedef Unique<VkBuffer> BufferHandleUp; 46typedef de::SharedPtr<BufferHandleUp> BufferHandleSp; 47 48/*--------------------------------------------------------------------*//*! 49 * \brief Create storage buffer, allocate and bind memory for the buffer 50 * 51 * The memory is created as host visible and passed back as a vk::Allocation 52 * instance via outMemory. 53 *//*--------------------------------------------------------------------*/ 54Move<VkBuffer> createBufferAndBindMemory (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorType dtype, Allocator& allocator, size_t numBytes, AllocationMp* outMemory) 55{ 56 VkBufferUsageFlags usageBit = (VkBufferUsageFlags)0; 57 58 switch (dtype) 59 { 60 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: usageBit = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; break; 61 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: usageBit = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; break; 62 default: DE_ASSERT(false); 63 } 64 65 const VkBufferCreateInfo bufferCreateInfo = 66 { 67 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType 68 DE_NULL, // pNext 69 0u, // flags 70 numBytes, // size 71 usageBit, // usage 72 VK_SHARING_MODE_EXCLUSIVE, // sharingMode 73 0u, // queueFamilyCount 74 DE_NULL, // pQueueFamilyIndices 75 }; 76 77 Move<VkBuffer> buffer (createBuffer(vkdi, device, &bufferCreateInfo)); 78 const VkMemoryRequirements requirements = getBufferMemoryRequirements(vkdi, device, *buffer); 79 AllocationMp bufferMemory = allocator.allocate(requirements, MemoryRequirement::HostVisible); 80 81 VK_CHECK(vkdi.bindBufferMemory(device, *buffer, bufferMemory->getMemory(), bufferMemory->getOffset())); 82 *outMemory = bufferMemory; 83 84 return buffer; 85} 86 87void setMemory (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, const void* data) 88{ 89 void* const hostPtr = destAlloc->getHostPtr(); 90 91 deMemcpy((deUint8*)hostPtr, data, numBytes); 92 flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes); 93} 94 95void fillMemoryWithValue (const DeviceInterface& vkdi, const VkDevice& device, Allocation* destAlloc, size_t numBytes, deUint8 value) 96{ 97 void* const hostPtr = destAlloc->getHostPtr(); 98 99 deMemset((deUint8*)hostPtr, value, numBytes); 100 flushMappedMemoryRange(vkdi, device, destAlloc->getMemory(), destAlloc->getOffset(), numBytes); 101} 102 103/*--------------------------------------------------------------------*//*! 104 * \brief Create a descriptor set layout with the given descriptor types 105 * 106 * All descriptors are created for compute pipeline. 107 *//*--------------------------------------------------------------------*/ 108Move<VkDescriptorSetLayout> createDescriptorSetLayout (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes) 109{ 110 DescriptorSetLayoutBuilder builder; 111 112 for (size_t bindingNdx = 0; bindingNdx < dtypes.size(); ++bindingNdx) 113 builder.addSingleBinding(dtypes[bindingNdx], VK_SHADER_STAGE_COMPUTE_BIT); 114 115 return builder.build(vkdi, device); 116} 117 118/*--------------------------------------------------------------------*//*! 119 * \brief Create a pipeline layout with one descriptor set 120 *//*--------------------------------------------------------------------*/ 121Move<VkPipelineLayout> createPipelineLayout (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorSetLayout descriptorSetLayout, const vkt::SpirVAssembly::BufferSp& pushConstants) 122{ 123 VkPipelineLayoutCreateInfo createInfo = 124 { 125 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType 126 DE_NULL, // pNext 127 (VkPipelineLayoutCreateFlags)0, 128 1u, // descriptorSetCount 129 &descriptorSetLayout, // pSetLayouts 130 0u, // pushConstantRangeCount 131 DE_NULL, // pPushConstantRanges 132 }; 133 134 VkPushConstantRange range = 135 { 136 VK_SHADER_STAGE_COMPUTE_BIT, // stageFlags 137 0, // offset 138 0, // size 139 }; 140 141 if (pushConstants != DE_NULL) 142 { 143 range.size = static_cast<deUint32>(pushConstants->getNumBytes()); 144 createInfo.pushConstantRangeCount = 1; 145 createInfo.pPushConstantRanges = ⦥ 146 } 147 148 return createPipelineLayout(vkdi, device, &createInfo); 149} 150 151/*--------------------------------------------------------------------*//*! 152 * \brief Create a one-time descriptor pool for one descriptor set that 153 * support the given descriptor types. 154 *//*--------------------------------------------------------------------*/ 155inline Move<VkDescriptorPool> createDescriptorPool (const DeviceInterface& vkdi, const VkDevice& device, const vector<VkDescriptorType>& dtypes) 156{ 157 DescriptorPoolBuilder builder; 158 159 for (size_t typeNdx = 0; typeNdx < dtypes.size(); ++typeNdx) 160 builder.addType(dtypes[typeNdx], 1); 161 162 return builder.build(vkdi, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, /* maxSets = */ 1); 163} 164 165/*--------------------------------------------------------------------*//*! 166 * \brief Create a descriptor set 167 * 168 * The descriptor set's layout contains the given descriptor types, 169 * sequentially binded to binding points starting from 0. 170 *//*--------------------------------------------------------------------*/ 171Move<VkDescriptorSet> createDescriptorSet (const DeviceInterface& vkdi, const VkDevice& device, VkDescriptorPool pool, VkDescriptorSetLayout layout, const vector<VkDescriptorType>& dtypes, const vector<VkDescriptorBufferInfo>& descriptorInfos) 172{ 173 DE_ASSERT(dtypes.size() == descriptorInfos.size()); 174 175 const VkDescriptorSetAllocateInfo allocInfo = 176 { 177 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 178 DE_NULL, 179 pool, 180 1u, 181 &layout 182 }; 183 184 Move<VkDescriptorSet> descriptorSet = allocateDescriptorSet(vkdi, device, &allocInfo); 185 DescriptorSetUpdateBuilder builder; 186 187 for (deUint32 descriptorNdx = 0; descriptorNdx < dtypes.size(); ++descriptorNdx) 188 builder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(descriptorNdx), dtypes[descriptorNdx], &descriptorInfos[descriptorNdx]); 189 builder.update(vkdi, device); 190 191 return descriptorSet; 192} 193 194/*--------------------------------------------------------------------*//*! 195 * \brief Create a compute pipeline based on the given shader 196 *//*--------------------------------------------------------------------*/ 197Move<VkPipeline> createComputePipeline (const DeviceInterface& vkdi, const VkDevice& device, VkPipelineLayout pipelineLayout, VkShaderModule shader, const char* entryPoint, const vector<deUint32>& specConstants) 198{ 199 const deUint32 numSpecConstants = (deUint32)specConstants.size(); 200 vector<VkSpecializationMapEntry> entries; 201 VkSpecializationInfo specInfo; 202 203 if (numSpecConstants != 0) 204 { 205 entries.resize(numSpecConstants); 206 207 for (deUint32 ndx = 0; ndx < numSpecConstants; ++ndx) 208 { 209 entries[ndx].constantID = ndx; 210 entries[ndx].offset = ndx * (deUint32)sizeof(deUint32); 211 entries[ndx].size = sizeof(deUint32); 212 } 213 214 specInfo.mapEntryCount = numSpecConstants; 215 specInfo.pMapEntries = &entries[0]; 216 specInfo.dataSize = numSpecConstants * sizeof(deUint32); 217 specInfo.pData = specConstants.data(); 218 } 219 220 const VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo = 221 { 222 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType 223 DE_NULL, // pNext 224 (VkPipelineShaderStageCreateFlags)0, // flags 225 VK_SHADER_STAGE_COMPUTE_BIT, // stage 226 shader, // module 227 entryPoint, // pName 228 (numSpecConstants == 0) ? DE_NULL : &specInfo, // pSpecializationInfo 229 }; 230 const VkComputePipelineCreateInfo pipelineCreateInfo = 231 { 232 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType 233 DE_NULL, // pNext 234 (VkPipelineCreateFlags)0, 235 pipelineShaderStageCreateInfo, // cs 236 pipelineLayout, // layout 237 (VkPipeline)0, // basePipelineHandle 238 0u, // basePipelineIndex 239 }; 240 241 return createComputePipeline(vkdi, device, (VkPipelineCache)0u, &pipelineCreateInfo); 242} 243 244/*--------------------------------------------------------------------*//*! 245 * \brief Create a command pool 246 * 247 * The created command pool is designated for use on the queue type 248 * represented by the given queueFamilyIndex. 249 *//*--------------------------------------------------------------------*/ 250Move<VkCommandPool> createCommandPool (const DeviceInterface& vkdi, VkDevice device, deUint32 queueFamilyIndex) 251{ 252 return createCommandPool(vkdi, device, 0u, queueFamilyIndex); 253} 254 255} // anonymous 256 257namespace vkt 258{ 259namespace SpirVAssembly 260{ 261 262/*--------------------------------------------------------------------*//*! 263 * \brief Test instance for compute pipeline 264 * 265 * The compute shader is specified in the format of SPIR-V assembly, which 266 * is allowed to access MAX_NUM_INPUT_BUFFERS input storage buffers and 267 * MAX_NUM_OUTPUT_BUFFERS output storage buffers maximally. The shader 268 * source and input/output data are given in a ComputeShaderSpec object. 269 * 270 * This instance runs the given compute shader by feeding the data from input 271 * buffers and compares the data in the output buffers with the expected. 272 *//*--------------------------------------------------------------------*/ 273class SpvAsmComputeShaderInstance : public TestInstance 274{ 275public: 276 SpvAsmComputeShaderInstance (Context& ctx, const ComputeShaderSpec& spec, const ComputeTestFeatures features); 277 tcu::TestStatus iterate (void); 278 279private: 280 const ComputeShaderSpec& m_shaderSpec; 281 const ComputeTestFeatures m_features; 282}; 283 284// ComputeShaderTestCase implementations 285 286SpvAsmComputeShaderCase::SpvAsmComputeShaderCase (tcu::TestContext& testCtx, const char* name, const char* description, const ComputeShaderSpec& spec, const ComputeTestFeatures features) 287 : TestCase (testCtx, name, description) 288 , m_shaderSpec (spec) 289 , m_features (features) 290{ 291} 292 293void SpvAsmComputeShaderCase::initPrograms (SourceCollections& programCollection) const 294{ 295 programCollection.spirvAsmSources.add("compute") << m_shaderSpec.assembly.c_str(); 296} 297 298TestInstance* SpvAsmComputeShaderCase::createInstance (Context& ctx) const 299{ 300 return new SpvAsmComputeShaderInstance(ctx, m_shaderSpec, m_features); 301} 302 303// ComputeShaderTestInstance implementations 304 305SpvAsmComputeShaderInstance::SpvAsmComputeShaderInstance (Context& ctx, const ComputeShaderSpec& spec, const ComputeTestFeatures features) 306 : TestInstance (ctx) 307 , m_shaderSpec (spec) 308 , m_features (features) 309{ 310} 311 312tcu::TestStatus SpvAsmComputeShaderInstance::iterate (void) 313{ 314 const VkPhysicalDeviceFeatures& features = m_context.getDeviceFeatures(); 315 316 if ((m_features == COMPUTE_TEST_USES_INT16 || m_features == COMPUTE_TEST_USES_INT16_INT64) && !features.shaderInt16) 317 { 318 TCU_THROW(NotSupportedError, "shaderInt16 feature is not supported"); 319 } 320 321 if ((m_features == COMPUTE_TEST_USES_INT64 || m_features == COMPUTE_TEST_USES_INT16_INT64) && !features.shaderInt64) 322 { 323 TCU_THROW(NotSupportedError, "shaderInt64 feature is not supported"); 324 } 325 326 { 327 const InstanceInterface& vki = m_context.getInstanceInterface(); 328 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice(); 329 330 // 16bit storage features 331 { 332 if (!is16BitStorageFeaturesSupported(vki, physicalDevice, m_context.getInstanceExtensions(), m_shaderSpec.requestedVulkanFeatures.ext16BitStorage)) 333 TCU_THROW(NotSupportedError, "Requested 16bit storage features not supported"); 334 } 335 336 // VariablePointers features 337 { 338 if (!isVariablePointersFeaturesSupported(vki, physicalDevice, m_context.getInstanceExtensions(), m_shaderSpec.requestedVulkanFeatures.extVariablePointers)) 339 TCU_THROW(NotSupportedError, "Request Variable Pointer feature not supported"); 340 } 341 } 342 343 // defer device and resource creation until after feature checks 344 const Unique<VkDevice> vkDevice (createDeviceWithExtensions(m_context, m_context.getUniversalQueueFamilyIndex(), m_context.getDeviceExtensions(), m_shaderSpec.extensions)); 345 const VkDevice& device = *vkDevice; 346 const DeviceDriver vkDeviceInterface (m_context.getInstanceInterface(), device); 347 const DeviceInterface& vkdi = vkDeviceInterface; 348 const de::UniquePtr<vk::Allocator> vkAllocator (createAllocator(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), vkDeviceInterface, device)); 349 Allocator& allocator = *vkAllocator; 350 const VkQueue queue (getDeviceQueue(vkDeviceInterface, device, m_context.getUniversalQueueFamilyIndex(), 0)); 351 352 vector<AllocationSp> inputAllocs; 353 vector<AllocationSp> outputAllocs; 354 vector<BufferHandleSp> inputBuffers; 355 vector<BufferHandleSp> outputBuffers; 356 vector<VkDescriptorBufferInfo> descriptorInfos; 357 vector<VkDescriptorType> descriptorTypes; 358 359 DE_ASSERT(!m_shaderSpec.outputs.empty()); 360 361 // Create buffer object, allocate storage, and create view for all input/output buffers. 362 363 for (deUint32 inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx) 364 { 365 if (m_shaderSpec.inputTypes.count(inputNdx) != 0) 366 descriptorTypes.push_back(m_shaderSpec.inputTypes.at(inputNdx)); 367 else 368 descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); 369 370 AllocationMp alloc; 371 const BufferSp& input = m_shaderSpec.inputs[inputNdx]; 372 const size_t numBytes = input->getNumBytes(); 373 BufferHandleUp* buffer = new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc)); 374 375 setMemory(vkdi, device, &*alloc, numBytes, input->data()); 376 descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes)); 377 inputBuffers.push_back(BufferHandleSp(buffer)); 378 inputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release())); 379 } 380 381 for (deUint32 outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx) 382 { 383 descriptorTypes.push_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); 384 385 AllocationMp alloc; 386 const BufferSp& output = m_shaderSpec.outputs[outputNdx]; 387 const size_t numBytes = output->getNumBytes(); 388 BufferHandleUp* buffer = new BufferHandleUp(createBufferAndBindMemory(vkdi, device, descriptorTypes.back(), allocator, numBytes, &alloc)); 389 390 fillMemoryWithValue(vkdi, device, &*alloc, numBytes, 0xff); 391 descriptorInfos.push_back(vk::makeDescriptorBufferInfo(**buffer, 0u, numBytes)); 392 outputBuffers.push_back(BufferHandleSp(buffer)); 393 outputAllocs.push_back(de::SharedPtr<Allocation>(alloc.release())); 394 } 395 396 // Create layouts and descriptor set. 397 398 Unique<VkDescriptorSetLayout> descriptorSetLayout (createDescriptorSetLayout(vkdi, device, descriptorTypes)); 399 Unique<VkPipelineLayout> pipelineLayout (createPipelineLayout(vkdi, device, *descriptorSetLayout, m_shaderSpec.pushConstants)); 400 Unique<VkDescriptorPool> descriptorPool (createDescriptorPool(vkdi, device, descriptorTypes)); 401 Unique<VkDescriptorSet> descriptorSet (createDescriptorSet(vkdi, device, *descriptorPool, *descriptorSetLayout, descriptorTypes, descriptorInfos)); 402 403 // Create compute shader and pipeline. 404 405 const ProgramBinary& binary = m_context.getBinaryCollection().get("compute"); 406 Unique<VkShaderModule> module (createShaderModule(vkdi, device, binary, (VkShaderModuleCreateFlags)0u)); 407 408 Unique<VkPipeline> computePipeline (createComputePipeline(vkdi, device, *pipelineLayout, *module, m_shaderSpec.entryPoint.c_str(), m_shaderSpec.specConstants)); 409 410 // Create command buffer and record commands 411 412 const Unique<VkCommandPool> cmdPool (createCommandPool(vkdi, device, m_context.getUniversalQueueFamilyIndex())); 413 Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkdi, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY)); 414 415 const VkCommandBufferBeginInfo cmdBufferBeginInfo = 416 { 417 VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // sType 418 DE_NULL, // pNext 419 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 420 (const VkCommandBufferInheritanceInfo*)DE_NULL, 421 }; 422 423 const tcu::IVec3& numWorkGroups = m_shaderSpec.numWorkGroups; 424 425 VK_CHECK(vkdi.beginCommandBuffer(*cmdBuffer, &cmdBufferBeginInfo)); 426 vkdi.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline); 427 vkdi.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL); 428 if (m_shaderSpec.pushConstants != DE_NULL) 429 { 430 const deUint32 size = static_cast<deUint32>(m_shaderSpec.pushConstants->getNumBytes()); 431 const void* data = m_shaderSpec.pushConstants->data(); 432 433 vkdi.cmdPushConstants(*cmdBuffer, *pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, /* offset = */ 0, /* size = */ size, data); 434 } 435 vkdi.cmdDispatch(*cmdBuffer, numWorkGroups.x(), numWorkGroups.y(), numWorkGroups.z()); 436 VK_CHECK(vkdi.endCommandBuffer(*cmdBuffer)); 437 438 // Create fence and run. 439 440 const Unique<VkFence> cmdCompleteFence (createFence(vkdi, device)); 441 const deUint64 infiniteTimeout = ~(deUint64)0u; 442 const VkSubmitInfo submitInfo = 443 { 444 VK_STRUCTURE_TYPE_SUBMIT_INFO, 445 DE_NULL, 446 0u, 447 (const VkSemaphore*)DE_NULL, 448 (const VkPipelineStageFlags*)DE_NULL, 449 1u, 450 &cmdBuffer.get(), 451 0u, 452 (const VkSemaphore*)DE_NULL, 453 }; 454 455 VK_CHECK(vkdi.queueSubmit(queue, 1, &submitInfo, *cmdCompleteFence)); 456 VK_CHECK(vkdi.waitForFences(device, 1, &cmdCompleteFence.get(), 0u, infiniteTimeout)); // \note: timeout is failure 457 458 // Check output. 459 if (m_shaderSpec.verifyIO) 460 { 461 if (!(*m_shaderSpec.verifyIO)(m_shaderSpec.inputs, outputAllocs, m_shaderSpec.outputs, m_context.getTestContext().getLog())) 462 return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage); 463 } 464 else 465 { 466 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx) 467 { 468 const BufferSp& expectedOutput = m_shaderSpec.outputs[outputNdx]; 469 if (deMemCmp(expectedOutput->data(), outputAllocs[outputNdx]->getHostPtr(), expectedOutput->getNumBytes())) 470 return tcu::TestStatus(m_shaderSpec.failResult, m_shaderSpec.failMessage); 471 } 472 } 473 474 return tcu::TestStatus::pass("Output match with expected"); 475} 476 477} // SpirVAssembly 478} // vkt 479