1/*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2016 The Khronos Group Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file vktSparseResourcesBufferSparseResidency.cpp 21 * \brief Sparse partially resident buffers tests 22 *//*--------------------------------------------------------------------*/ 23 24#include "vktSparseResourcesBufferSparseResidency.hpp" 25#include "vktSparseResourcesTestsUtil.hpp" 26#include "vktSparseResourcesBase.hpp" 27#include "vktTestCaseUtil.hpp" 28 29#include "vkDefs.hpp" 30#include "vkRef.hpp" 31#include "vkRefUtil.hpp" 32#include "vkPlatform.hpp" 33#include "vkPrograms.hpp" 34#include "vkRefUtil.hpp" 35#include "vkMemUtil.hpp" 36#include "vkQueryUtil.hpp" 37#include "vkBuilderUtil.hpp" 38#include "vkTypeUtil.hpp" 39 40#include "deStringUtil.hpp" 41#include "deUniquePtr.hpp" 42 43#include <string> 44#include <vector> 45 46using namespace vk; 47 48namespace vkt 49{ 50namespace sparse 51{ 52namespace 53{ 54 55enum ShaderParameters 56{ 57 SIZE_OF_UINT_IN_SHADER = 4u, 58}; 59 60class BufferSparseResidencyCase : public TestCase 61{ 62public: 63 BufferSparseResidencyCase (tcu::TestContext& testCtx, 64 const std::string& name, 65 const std::string& description, 66 const deUint32 bufferSize, 67 const glu::GLSLVersion glslVersion, 68 const bool useDeviceGroups); 69 70 71 void initPrograms (SourceCollections& sourceCollections) const; 72 TestInstance* createInstance (Context& context) const; 73 74private: 75 const deUint32 m_bufferSize; 76 const glu::GLSLVersion m_glslVersion; 77 const bool m_useDeviceGroups; 78 79}; 80 81BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext& testCtx, 82 const std::string& name, 83 const std::string& description, 84 const deUint32 bufferSize, 85 const glu::GLSLVersion glslVersion, 86 const bool useDeviceGroups) 87 88 : TestCase (testCtx, name, description) 89 , m_bufferSize (bufferSize) 90 , m_glslVersion (glslVersion) 91 , m_useDeviceGroups (useDeviceGroups) 92{ 93} 94 95void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const 96{ 97 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion); 98 const deUint32 iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER; 99 100 std::ostringstream src; 101 102 src << versionDecl << "\n" 103 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n" 104 << "layout(set = 0, binding = 0, std430) readonly buffer Input\n" 105 << "{\n" 106 << " uint data[];\n" 107 << "} sb_in;\n" 108 << "\n" 109 << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n" 110 << "{\n" 111 << " uint result[];\n" 112 << "} sb_out;\n" 113 << "\n" 114 << "void main (void)\n" 115 << "{\n" 116 << " for(int i=0; i<" << iterationsCount << "; ++i) \n" 117 << " {\n" 118 << " sb_out.result[i] = sb_in.data[i];" 119 << " }\n" 120 << "}\n"; 121 122 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str()); 123} 124 125class BufferSparseResidencyInstance : public SparseResourcesBaseInstance 126{ 127public: 128 BufferSparseResidencyInstance (Context& context, 129 const deUint32 bufferSize, 130 const bool useDeviceGroups); 131 132 tcu::TestStatus iterate (void); 133 134private: 135 const deUint32 m_bufferSize; 136 const deUint32 m_useDeviceGroups; 137}; 138 139BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context& context, 140 const deUint32 bufferSize, 141 const bool useDeviceGroups) 142 : SparseResourcesBaseInstance (context) 143 , m_bufferSize (bufferSize) 144 , m_useDeviceGroups (useDeviceGroups) 145{ 146} 147 148tcu::TestStatus BufferSparseResidencyInstance::iterate (void) 149{ 150 const InstanceInterface& instance = m_context.getInstanceInterface(); 151 { 152 // Create logical device supporting both sparse and compute operations 153 QueueRequirementsVec queueRequirements; 154 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u)); 155 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u)); 156 157 createDeviceSupportingQueues(queueRequirements); 158 } 159 const VkPhysicalDevice physicalDevice = getPhysicalDevice(); 160 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice); 161 162 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer) 163 TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported"); 164 165 const DeviceInterface& deviceInterface = getDeviceInterface(); 166 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0); 167 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0); 168 169 // Go through all physical devices 170 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++) 171 { 172 const deUint32 firstDeviceID = physDevID; 173 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices; 174 175 VkBufferCreateInfo bufferCreateInfo = 176 { 177 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType; 178 DE_NULL, // const void* pNext; 179 VK_BUFFER_CREATE_SPARSE_BINDING_BIT | 180 VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags; 181 m_bufferSize, // VkDeviceSize size; 182 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | 183 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage; 184 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; 185 0u, // deUint32 queueFamilyIndexCount; 186 DE_NULL // const deUint32* pQueueFamilyIndices; 187 }; 188 189 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex }; 190 191 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex) 192 { 193 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; 194 bufferCreateInfo.queueFamilyIndexCount = 2u; 195 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices; 196 } 197 198 // Create sparse buffer 199 const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo)); 200 201 // Create sparse buffer memory bind semaphore 202 const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice())); 203 204 const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer); 205 206 if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize) 207 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits"); 208 209 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0); 210 211 const deUint32 numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment); 212 std::vector<DeviceMemorySp> deviceMemUniquePtrVec; 213 214 { 215 std::vector<VkSparseMemoryBind> sparseMemoryBinds; 216 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any); 217 218 if (memoryType == NO_MATCH_FOUND) 219 return tcu::TestStatus::fail("No matching memory type found"); 220 221 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2) 222 { 223 const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx); 224 225 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL)))); 226 227 sparseMemoryBinds.push_back(sparseMemoryBind); 228 } 229 230 const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]); 231 232 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = 233 { 234 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR, //VkStructureType sType; 235 DE_NULL, //const void* pNext; 236 firstDeviceID, //deUint32 resourceDeviceIndex; 237 secondDeviceID, //deUint32 memoryDeviceIndex; 238 }; 239 const VkBindSparseInfo bindSparseInfo = 240 { 241 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType; 242 m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext; 243 0u, //deUint32 waitSemaphoreCount; 244 DE_NULL, //const VkSemaphore* pWaitSemaphores; 245 1u, //deUint32 bufferBindCount; 246 &sparseBufferBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds; 247 0u, //deUint32 imageOpaqueBindCount; 248 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds; 249 0u, //deUint32 imageBindCount; 250 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds; 251 1u, //deUint32 signalSemaphoreCount; 252 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores; 253 }; 254 255 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL)); 256 } 257 258 // Create input buffer 259 const VkBufferCreateInfo inputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); 260 const Unique<VkBuffer> inputBuffer (createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo)); 261 const de::UniquePtr<Allocation> inputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible)); 262 263 264 std::vector<deUint8> referenceData; 265 referenceData.resize(m_bufferSize); 266 267 for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx) 268 { 269 referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u); 270 } 271 272 deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize); 273 274 flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize); 275 276 // Create output buffer 277 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 278 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo)); 279 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible)); 280 281 // Create command buffer for compute and data transfer oparations 282 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex)); 283 const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY)); 284 285 // Start recording compute and transfer commands 286 beginCommandBuffer(deviceInterface, *commandBuffer); 287 288 // Create descriptor set 289 const Unique<VkDescriptorSetLayout> descriptorSetLayout( 290 DescriptorSetLayoutBuilder() 291 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 292 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 293 .build(deviceInterface, getDevice())); 294 295 // Create compute pipeline 296 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL)); 297 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout)); 298 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule)); 299 300 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline); 301 302 const Unique<VkDescriptorPool> descriptorPool( 303 DescriptorPoolBuilder() 304 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u) 305 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u)); 306 307 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout)); 308 309 { 310 const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize); 311 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize); 312 313 DescriptorSetUpdateBuilder() 314 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo) 315 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo) 316 .update(deviceInterface, getDevice()); 317 } 318 319 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL); 320 321 { 322 const VkBufferMemoryBarrier inputBufferBarrier 323 = makeBufferMemoryBarrier( VK_ACCESS_HOST_WRITE_BIT, 324 VK_ACCESS_SHADER_READ_BIT, 325 *inputBuffer, 326 0ull, 327 m_bufferSize); 328 329 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL); 330 } 331 332 deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u); 333 334 { 335 const VkBufferMemoryBarrier sparseBufferBarrier 336 = makeBufferMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT, 337 VK_ACCESS_TRANSFER_READ_BIT, 338 *sparseBuffer, 339 0ull, 340 m_bufferSize); 341 342 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL); 343 } 344 345 { 346 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize); 347 348 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy); 349 } 350 351 { 352 const VkBufferMemoryBarrier outputBufferBarrier 353 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT, 354 VK_ACCESS_HOST_READ_BIT, 355 *outputBuffer, 356 0ull, 357 m_bufferSize); 358 359 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL); 360 } 361 362 // End recording compute and transfer commands 363 endCommandBuffer(deviceInterface, *commandBuffer); 364 365 const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT }; 366 367 // Submit transfer commands for execution and wait for completion 368 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), 369 waitStageBits, 0, DE_NULL, m_useDeviceGroups, firstDeviceID); 370 371 // Retrieve data from output buffer to host memory 372 invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSize); 373 374 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr()); 375 376 // Wait for sparse queue to become idle 377 deviceInterface.queueWaitIdle(sparseQueue.queueHandle); 378 379 // Compare output data with reference data 380 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx) 381 { 382 const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment); 383 const deUint32 offset = alignment * sparseBindNdx; 384 const deUint32 size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment; 385 386 if (sparseBindNdx % 2u == 0u) 387 { 388 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0) 389 return tcu::TestStatus::fail("Failed"); 390 } 391 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict) 392 { 393 deMemset(&referenceData[offset], 0u, size); 394 395 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0) 396 return tcu::TestStatus::fail("Failed"); 397 } 398 } 399 } 400 401 return tcu::TestStatus::pass("Passed"); 402} 403 404TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const 405{ 406 return new BufferSparseResidencyInstance(context, m_bufferSize, m_useDeviceGroups); 407} 408 409} // anonymous ns 410 411void addBufferSparseResidencyTests(tcu::TestCaseGroup* group, const bool useDeviceGroups) 412{ 413 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440, useDeviceGroups)); 414 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440, useDeviceGroups)); 415 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440, useDeviceGroups)); 416 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440, useDeviceGroups)); 417 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440, useDeviceGroups)); 418 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440, useDeviceGroups)); 419} 420 421} // sparse 422} // vkt 423