1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file  vktSparseResourcesImageSparseResidency.cpp
21 * \brief Sparse partially resident images tests
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSparseResourcesBufferSparseBinding.hpp"
25#include "vktSparseResourcesTestsUtil.hpp"
26#include "vktSparseResourcesBase.hpp"
27#include "vktTestCaseUtil.hpp"
28
29#include "vkDefs.hpp"
30#include "vkRef.hpp"
31#include "vkRefUtil.hpp"
32#include "vkPlatform.hpp"
33#include "vkPrograms.hpp"
34#include "vkMemUtil.hpp"
35#include "vkBuilderUtil.hpp"
36#include "vkImageUtil.hpp"
37#include "vkQueryUtil.hpp"
38#include "vkTypeUtil.hpp"
39
40#include "deUniquePtr.hpp"
41#include "deStringUtil.hpp"
42
43#include <string>
44#include <vector>
45
46using namespace vk;
47
48namespace vkt
49{
50namespace sparse
51{
52namespace
53{
54
55const std::string getCoordStr  (const ImageType		imageType,
56								const std::string&	x,
57								const std::string&	y,
58								const std::string&	z)
59{
60	switch (imageType)
61	{
62		case IMAGE_TYPE_1D:
63		case IMAGE_TYPE_BUFFER:
64			return x;
65
66		case IMAGE_TYPE_1D_ARRAY:
67		case IMAGE_TYPE_2D:
68			return "ivec2(" + x + "," + y + ")";
69
70		case IMAGE_TYPE_2D_ARRAY:
71		case IMAGE_TYPE_3D:
72		case IMAGE_TYPE_CUBE:
73		case IMAGE_TYPE_CUBE_ARRAY:
74			return "ivec3(" + x + "," + y + "," + z + ")";
75
76		default:
77			DE_ASSERT(false);
78			return "";
79	}
80}
81
82tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
83{
84	tcu::UVec3 result;
85
86	result.x() = extent.width  / divisor.width  + ((extent.width  % divisor.width)  ? 1u : 0u);
87	result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
88	result.z() = extent.depth  / divisor.depth  + ((extent.depth  % divisor.depth)  ? 1u : 0u);
89
90	return result;
91}
92
93tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
94{
95	const deUint32		maxComputeWorkGroupInvocations	= 128u;
96	const tcu::UVec3	maxComputeWorkGroupSize			= tcu::UVec3(128u, 128u, 64u);
97
98	const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
99	const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations /  xWorkGroupSize);
100	const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
101
102	return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
103}
104
105class ImageSparseResidencyCase : public TestCase
106{
107public:
108					ImageSparseResidencyCase	(tcu::TestContext&			testCtx,
109												 const std::string&			name,
110												 const std::string&			description,
111												 const ImageType			imageType,
112												 const tcu::UVec3&			imageSize,
113												 const tcu::TextureFormat&	format,
114												 const glu::GLSLVersion		glslVersion,
115												 const bool					useDeviceGroups);
116
117	void			initPrograms				(SourceCollections&			sourceCollections) const;
118	TestInstance*	createInstance				(Context&					context) const;
119
120private:
121	const bool					m_useDeviceGroups;
122	const ImageType				m_imageType;
123	const tcu::UVec3			m_imageSize;
124	const tcu::TextureFormat	m_format;
125	const glu::GLSLVersion		m_glslVersion;
126};
127
128ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext&			testCtx,
129													const std::string&			name,
130													const std::string&			description,
131													const ImageType				imageType,
132													const tcu::UVec3&			imageSize,
133													const tcu::TextureFormat&	format,
134													const glu::GLSLVersion		glslVersion,
135													const bool					useDeviceGroups)
136	: TestCase				(testCtx, name, description)
137	, m_useDeviceGroups		(useDeviceGroups)
138	, m_imageType			(imageType)
139	, m_imageSize			(imageSize)
140	, m_format				(format)
141	, m_glslVersion			(glslVersion)
142{
143}
144
145void ImageSparseResidencyCase::initPrograms (SourceCollections&	sourceCollections) const
146{
147	// Create compute program
148	const char* const versionDecl			= glu::getGLSLVersionDeclaration(m_glslVersion);
149	const std::string imageTypeStr			= getShaderImageType(m_format, m_imageType);
150	const std::string formatQualifierStr	= getShaderImageFormatQualifier(m_format);
151	const std::string formatDataStr			= getShaderImageDataType(m_format);
152	const tcu::UVec3  gridSize				= getShaderGridSize(m_imageType, m_imageSize);
153	const tcu::UVec3  workGroupSize			= computeWorkGroupSize(gridSize);
154
155	std::ostringstream src;
156	src << versionDecl << "\n"
157		<< "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
158		<< "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
159		<< "void main (void)\n"
160		<< "{\n"
161		<< "	if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
162		<< "	if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
163		<< "	if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
164		<< "	{\n"
165		<< "		imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
166		<< formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
167		<< "	}\n"
168		<< "}\n";
169
170	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
171}
172
173class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
174{
175public:
176					ImageSparseResidencyInstance(Context&									 context,
177												 const ImageType							 imageType,
178												 const tcu::UVec3&							 imageSize,
179												 const tcu::TextureFormat&					 format,
180												 const bool									 useDeviceGroups);
181
182
183	tcu::TestStatus	iterate						(void);
184
185private:
186	const bool					m_useDeviceGroups;
187	const ImageType				m_imageType;
188	const tcu::UVec3			m_imageSize;
189	const tcu::TextureFormat	m_format;
190};
191
192ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context&					context,
193															const ImageType				imageType,
194															const tcu::UVec3&			imageSize,
195															const tcu::TextureFormat&	format,
196															const bool					useDeviceGroups)
197	: SparseResourcesBaseInstance	(context, useDeviceGroups)
198	, m_useDeviceGroups				(useDeviceGroups)
199	, m_imageType					(imageType)
200	, m_imageSize					(imageSize)
201	, m_format						(format)
202{
203}
204
205tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
206{
207	const InstanceInterface&			instance = m_context.getInstanceInterface();
208
209	{
210		// Create logical device supporting both sparse and compute queues
211		QueueRequirementsVec queueRequirements;
212		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
213		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
214
215		createDeviceSupportingQueues(queueRequirements);
216	}
217
218	VkImageCreateInfo					imageCreateInfo;
219	VkSparseImageMemoryRequirements		aspectRequirements;
220	VkExtent3D							imageGranularity;
221	std::vector<DeviceMemorySp>			deviceMemUniquePtrVec;
222
223	const DeviceInterface&	deviceInterface	= getDeviceInterface();
224	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
225	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
226
227	// Go through all physical devices
228	for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
229	{
230		const deUint32						firstDeviceID				= physDevID;
231		const deUint32						secondDeviceID				= (firstDeviceID + 1) % m_numPhysicalDevices;
232
233		const VkPhysicalDevice				physicalDevice				= getPhysicalDevice(firstDeviceID);
234		const VkPhysicalDeviceProperties	physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
235
236		// Check if image size does not exceed device limits
237		if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
238			TCU_THROW(NotSupportedError, "Image size not supported for device");
239
240		// Check if device supports sparse operations for image type
241		if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
242			TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
243
244		imageCreateInfo.sType					= VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
245		imageCreateInfo.pNext					= DE_NULL;
246		imageCreateInfo.flags					= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
247		imageCreateInfo.imageType				= mapImageType(m_imageType);
248		imageCreateInfo.format					= mapTextureFormat(m_format);
249		imageCreateInfo.extent					= makeExtent3D(getLayerSize(m_imageType, m_imageSize));
250		imageCreateInfo.mipLevels				= 1u;
251		imageCreateInfo.arrayLayers				= getNumLayers(m_imageType, m_imageSize);
252		imageCreateInfo.samples					= VK_SAMPLE_COUNT_1_BIT;
253		imageCreateInfo.tiling					= VK_IMAGE_TILING_OPTIMAL;
254		imageCreateInfo.initialLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
255		imageCreateInfo.usage					= VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
256												  VK_IMAGE_USAGE_STORAGE_BIT;
257		imageCreateInfo.sharingMode				= VK_SHARING_MODE_EXCLUSIVE;
258		imageCreateInfo.queueFamilyIndexCount	= 0u;
259		imageCreateInfo.pQueueFamilyIndices		= DE_NULL;
260
261		if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
262		{
263			imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
264		}
265
266		// Check if device supports sparse operations for image format
267		if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
268			TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
269
270		// Create sparse image
271		const Unique<VkImage> sparseImage(createImage(deviceInterface, getDevice(), &imageCreateInfo));
272
273		// Create sparse image memory bind semaphore
274		const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
275
276		{
277			// Get image general memory requirements
278			const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
279
280			if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
281				TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
282
283			DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
284
285			// Get sparse image sparse memory requirements
286			const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
287
288			DE_ASSERT(sparseMemoryRequirements.size() != 0);
289
290			const deUint32 colorAspectIndex		= getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
291			const deUint32 metadataAspectIndex	= getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
292
293			if (colorAspectIndex == NO_MATCH_FOUND)
294				TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
295
296			aspectRequirements	= sparseMemoryRequirements[colorAspectIndex];
297			imageGranularity	= aspectRequirements.formatProperties.imageGranularity;
298
299			const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
300
301			DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
302
303			std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
304			std::vector<VkSparseMemoryBind>		 imageMipTailMemoryBinds;
305
306			const deUint32						 memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
307
308			if (memoryType == NO_MATCH_FOUND)
309				return tcu::TestStatus::fail("No matching memory type found");
310
311			// Bind device memory for each aspect
312			for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
313			{
314				for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
315				{
316					const VkImageSubresource subresource		= { aspectMask, mipLevelNdx, layerNdx };
317					const VkExtent3D		 mipExtent			= mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
318					const tcu::UVec3		 numSparseBinds		= alignedDivide(mipExtent, imageGranularity);
319					const tcu::UVec3		 lastBlockExtent	= tcu::UVec3(mipExtent.width  % imageGranularity.width  ? mipExtent.width   % imageGranularity.width  : imageGranularity.width,
320																			 mipExtent.height % imageGranularity.height ? mipExtent.height  % imageGranularity.height : imageGranularity.height,
321																			 mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth   % imageGranularity.depth  : imageGranularity.depth);
322					for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
323					for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
324					for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
325					{
326						const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
327
328						if (linearIndex % 2u == 1u)
329						{
330							continue;
331						}
332
333						VkOffset3D offset;
334						offset.x = x*imageGranularity.width;
335						offset.y = y*imageGranularity.height;
336						offset.z = z*imageGranularity.depth;
337
338						VkExtent3D extent;
339						extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
340						extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
341						extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
342
343						const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
344							imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
345
346						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
347
348						imageResidencyMemoryBinds.push_back(imageMemoryBind);
349					}
350				}
351
352				if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
353				{
354					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
355						aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
356
357					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
358
359					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
360				}
361
362				// Metadata
363				if (metadataAspectIndex != NO_MATCH_FOUND)
364				{
365					const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
366
367					if (!(metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
368					{
369						const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
370							metadataAspectRequirements.imageMipTailSize, memoryType,
371							metadataAspectRequirements.imageMipTailOffset + layerNdx * metadataAspectRequirements.imageMipTailStride,
372							VK_SPARSE_MEMORY_BIND_METADATA_BIT);
373
374						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
375
376						imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
377					}
378				}
379			}
380
381			if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
382			{
383				const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
384					aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
385
386				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
387
388				imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
389			}
390
391			// Metadata
392			if (metadataAspectIndex != NO_MATCH_FOUND)
393			{
394				const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
395
396				if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
397				{
398					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
399						metadataAspectRequirements.imageMipTailSize, memoryType, metadataAspectRequirements.imageMipTailOffset,
400						VK_SPARSE_MEMORY_BIND_METADATA_BIT);
401
402					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
403
404					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
405				}
406			}
407
408			const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
409			{
410				VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,	//VkStructureType							sType;
411				DE_NULL,												//const void*								pNext;
412				firstDeviceID,											//deUint32									resourceDeviceIndex;
413				secondDeviceID,											//deUint32									memoryDeviceIndex;
414			};
415
416			VkBindSparseInfo bindSparseInfo =
417			{
418				VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,						//VkStructureType							sType;
419				m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,	//const void*								pNext;
420				0u,														//deUint32									waitSemaphoreCount;
421				DE_NULL,												//const VkSemaphore*						pWaitSemaphores;
422				0u,														//deUint32									bufferBindCount;
423				DE_NULL,												//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
424				0u,														//deUint32									imageOpaqueBindCount;
425				DE_NULL,												//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
426				0u,														//deUint32									imageBindCount;
427				DE_NULL,												//const VkSparseImageMemoryBindInfo*		pImageBinds;
428				1u,														//deUint32									signalSemaphoreCount;
429				&imageMemoryBindSemaphore.get()							//const VkSemaphore*						pSignalSemaphores;
430			};
431
432			VkSparseImageMemoryBindInfo		  imageResidencyBindInfo;
433			VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
434
435			if (imageResidencyMemoryBinds.size() > 0)
436			{
437				imageResidencyBindInfo.image		= *sparseImage;
438				imageResidencyBindInfo.bindCount	= static_cast<deUint32>(imageResidencyMemoryBinds.size());
439				imageResidencyBindInfo.pBinds		= &imageResidencyMemoryBinds[0];
440
441				bindSparseInfo.imageBindCount		= 1u;
442				bindSparseInfo.pImageBinds			= &imageResidencyBindInfo;
443			}
444
445			if (imageMipTailMemoryBinds.size() > 0)
446			{
447				imageMipTailBindInfo.image			= *sparseImage;
448				imageMipTailBindInfo.bindCount		= static_cast<deUint32>(imageMipTailMemoryBinds.size());
449				imageMipTailBindInfo.pBinds			= &imageMipTailMemoryBinds[0];
450
451				bindSparseInfo.imageOpaqueBindCount = 1u;
452				bindSparseInfo.pImageOpaqueBinds	= &imageMipTailBindInfo;
453			}
454
455			// Submit sparse bind commands for execution
456			VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
457		}
458
459		// Create command buffer for compute and transfer oparations
460		const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
461		const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
462
463		// Start recording commands
464		beginCommandBuffer(deviceInterface, *commandBuffer);
465
466		// Create descriptor set layout
467		const Unique<VkDescriptorSetLayout> descriptorSetLayout(
468			DescriptorSetLayoutBuilder()
469			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
470			.build(deviceInterface, getDevice()));
471
472		// Create and bind compute pipeline
473		const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
474		const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
475		const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
476
477		deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
478
479		// Create and bind descriptor set
480		const Unique<VkDescriptorPool> descriptorPool(
481			DescriptorPoolBuilder()
482			.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
483			.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
484
485		const Unique<VkDescriptorSet>	descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
486
487		const VkImageSubresourceRange	subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
488		const Unique<VkImageView>		imageView(makeImageView(deviceInterface, getDevice(), *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
489		const VkDescriptorImageInfo		sparseImageInfo  = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
490
491		DescriptorSetUpdateBuilder()
492			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
493			.update(deviceInterface, getDevice());
494
495		deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
496
497		{
498			const VkImageMemoryBarrier sparseImageLayoutChangeBarrier = makeImageMemoryBarrier
499			(
500				0u,
501				VK_ACCESS_SHADER_WRITE_BIT,
502				VK_IMAGE_LAYOUT_UNDEFINED,
503				VK_IMAGE_LAYOUT_GENERAL,
504				sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
505				sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
506				*sparseImage,
507				subresourceRange
508			);
509
510			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
511		}
512
513		const tcu::UVec3  gridSize = getShaderGridSize(m_imageType, m_imageSize);
514
515		{
516			const tcu::UVec3  workGroupSize = computeWorkGroupSize(gridSize);
517
518			const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
519			const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
520			const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
521
522			const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
523
524			if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
525				maxComputeWorkGroupCount.y() < yWorkGroupCount ||
526				maxComputeWorkGroupCount.z() < zWorkGroupCount)
527			{
528				TCU_THROW(NotSupportedError, "Image size is not supported");
529			}
530
531			deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
532		}
533
534		{
535			const VkImageMemoryBarrier sparseImageTrasferBarrier = makeImageMemoryBarrier
536			(
537				VK_ACCESS_SHADER_WRITE_BIT,
538				VK_ACCESS_TRANSFER_READ_BIT,
539				VK_IMAGE_LAYOUT_GENERAL,
540				VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
541				*sparseImage,
542				subresourceRange
543			);
544
545			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
546		}
547
548		const deUint32					imageSizeInBytes		= getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
549		const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
550		const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
551		const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
552
553		{
554			const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
555
556			deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &bufferImageCopy);
557		}
558
559		{
560			const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
561			(
562				VK_ACCESS_TRANSFER_WRITE_BIT,
563				VK_ACCESS_HOST_READ_BIT,
564				*outputBuffer,
565				0u,
566				imageSizeInBytes
567			);
568
569			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
570		}
571
572		// End recording commands
573		endCommandBuffer(deviceInterface, *commandBuffer);
574
575		// The stage at which execution is going to wait for finish of sparse binding operations
576		const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
577
578		// Submit commands for execution and wait for completion
579		submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits,
580			0, DE_NULL, m_useDeviceGroups, firstDeviceID);
581
582		// Retrieve data from buffer to host memory
583		invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), imageSizeInBytes);
584
585		const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
586		const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
587
588		// Wait for sparse queue to become idle
589		//vsk fails:
590		deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
591
592		// Validate results
593		if( aspectRequirements.imageMipTailFirstLod > 0u )
594		{
595			const VkExtent3D		 mipExtent		 = mipLevelExtents(imageCreateInfo.extent, 0u);
596			const tcu::UVec3		 numSparseBinds  = alignedDivide(mipExtent, imageGranularity);
597			const tcu::UVec3		 lastBlockExtent = tcu::UVec3(	mipExtent.width  % imageGranularity.width  ? mipExtent.width  % imageGranularity.width  : imageGranularity.width,
598																	mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
599																	mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth  % imageGranularity.depth  : imageGranularity.depth);
600
601			for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
602			{
603				for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
604				for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
605				for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
606				{
607					VkExtent3D offset;
608					offset.width  = x*imageGranularity.width;
609					offset.height = y*imageGranularity.height;
610					offset.depth  = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
611
612					VkExtent3D extent;
613					extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
614					extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
615					extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
616
617					const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
618
619					if (linearIndex % 2u == 0u)
620					{
621						for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
622						for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
623						for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
624						{
625							const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
626							const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
627
628							if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
629								return tcu::TestStatus::fail("Failed");
630						}
631					}
632					else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
633					{
634						for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
635						for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
636						for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
637						{
638							const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
639							const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
640
641							if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
642								return tcu::TestStatus::fail("Failed");
643						}
644					}
645				}
646			}
647		}
648		else
649		{
650			const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
651
652			for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
653			for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
654			for (deUint32 offsetX = 0u; offsetX < mipExtent.width;  ++offsetX)
655			{
656				const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
657				const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
658
659				if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
660					return tcu::TestStatus::fail("Failed");
661			}
662		}
663	}
664
665	return tcu::TestStatus::pass("Passed");
666}
667
668TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
669{
670	return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
671}
672
673} // anonymous ns
674
675tcu::TestCaseGroup* createImageSparseResidencyTestsCommon (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup> testGroup, const bool useDeviceGroup = false)
676{
677	static const deUint32 sizeCountPerImageType = 3u;
678
679	struct ImageParameters
680	{
681		ImageType	imageType;
682		tcu::UVec3	imageSizes[sizeCountPerImageType];
683	};
684
685	static const ImageParameters imageParametersArray[] =
686	{
687		{ IMAGE_TYPE_2D,		 { tcu::UVec3(512u, 256u, 1u),  tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u,  137u, 1u) } },
688		{ IMAGE_TYPE_2D_ARRAY,	 { tcu::UVec3(512u, 256u, 6u),	tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } },
689		{ IMAGE_TYPE_CUBE,		 { tcu::UVec3(256u, 256u, 1u),	tcu::UVec3(128u,  128u, 1u), tcu::UVec3(137u, 137u, 1u) } },
690		{ IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(256u, 256u, 6u),	tcu::UVec3(128u,  128u, 8u), tcu::UVec3(137u, 137u, 3u) } },
691		{ IMAGE_TYPE_3D,		 { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } }
692	};
693
694	static const tcu::TextureFormat formats[] =
695	{
696		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT32),
697		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT16),
698		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT8),
699		tcu::TextureFormat(tcu::TextureFormat::RG,	 tcu::TextureFormat::SIGNED_INT32),
700		tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT16),
701		tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT8),
702		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
703		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
704		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
705	};
706
707	for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
708	{
709		const ImageType					imageType = imageParametersArray[imageTypeNdx].imageType;
710		de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
711
712		for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
713		{
714			const tcu::TextureFormat&		format = formats[formatNdx];
715			de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
716
717			for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
718			{
719				const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
720
721				std::ostringstream stream;
722				stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
723
724				formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440, useDeviceGroup));
725			}
726			imageTypeGroup->addChild(formatGroup.release());
727		}
728		testGroup->addChild(imageTypeGroup.release());
729	}
730
731	return testGroup.release();
732}
733
734tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
735{
736	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
737	return createImageSparseResidencyTestsCommon(testCtx, testGroup);
738}
739
740tcu::TestCaseGroup* createDeviceGroupImageSparseResidencyTests (tcu::TestContext& testCtx)
741{
742	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_residency", "Buffer Sparse Residency"));
743	return createImageSparseResidencyTestsCommon(testCtx, testGroup, true);
744}
745
746} // sparse
747} // vkt
748