1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file  vktSparseResourcesImageMemoryAliasing.cpp
21 * \brief Sparse image memory aliasing tests
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSparseResourcesImageMemoryAliasing.hpp"
25#include "vktSparseResourcesTestsUtil.hpp"
26#include "vktSparseResourcesBase.hpp"
27#include "vktTestCaseUtil.hpp"
28
29#include "vkDefs.hpp"
30#include "vkRef.hpp"
31#include "vkRefUtil.hpp"
32#include "vkPlatform.hpp"
33#include "vkPrograms.hpp"
34#include "vkRefUtil.hpp"
35#include "vkMemUtil.hpp"
36#include "vkQueryUtil.hpp"
37#include "vkBuilderUtil.hpp"
38#include "vkTypeUtil.hpp"
39
40#include "deStringUtil.hpp"
41#include "deUniquePtr.hpp"
42#include "deSharedPtr.hpp"
43#include "tcuTexture.hpp"
44
45#include <deMath.h>
46#include <string>
47#include <vector>
48
49using namespace vk;
50
51namespace vkt
52{
53namespace sparse
54{
55namespace
56{
57
58enum ShaderParameters
59{
60	MODULO_DIVISOR = 128
61};
62
63const std::string getCoordStr  (const ImageType		imageType,
64								const std::string&	x,
65								const std::string&	y,
66								const std::string&	z)
67{
68	switch (imageType)
69	{
70		case IMAGE_TYPE_1D:
71		case IMAGE_TYPE_BUFFER:
72			return x;
73
74		case IMAGE_TYPE_1D_ARRAY:
75		case IMAGE_TYPE_2D:
76			return "ivec2(" + x + "," + y + ")";
77
78		case IMAGE_TYPE_2D_ARRAY:
79		case IMAGE_TYPE_3D:
80		case IMAGE_TYPE_CUBE:
81		case IMAGE_TYPE_CUBE_ARRAY:
82			return "ivec3(" + x + "," + y + "," + z + ")";
83
84		default:
85			DE_ASSERT(false);
86			return "";
87	}
88}
89
90tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
91{
92	tcu::UVec3 result;
93
94	result.x() = extent.width  / divisor.width  + ((extent.width  % divisor.width)  ? 1u : 0u);
95	result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
96	result.z() = extent.depth  / divisor.depth  + ((extent.depth  % divisor.depth)  ? 1u : 0u);
97
98	return result;
99}
100
101class ImageSparseMemoryAliasingCase : public TestCase
102{
103public:
104					ImageSparseMemoryAliasingCase	(tcu::TestContext&			testCtx,
105													 const std::string&			name,
106													 const std::string&			description,
107													 const ImageType			imageType,
108													 const tcu::UVec3&			imageSize,
109													 const tcu::TextureFormat&	format,
110													 const glu::GLSLVersion		glslVersion);
111
112	void			initPrograms					(SourceCollections&			sourceCollections) const;
113	TestInstance*	createInstance					(Context&					context) const;
114
115
116private:
117	const ImageType				m_imageType;
118	const tcu::UVec3			m_imageSize;
119	const tcu::TextureFormat	m_format;
120	const glu::GLSLVersion		m_glslVersion;
121};
122
123ImageSparseMemoryAliasingCase::ImageSparseMemoryAliasingCase (tcu::TestContext&			testCtx,
124															  const std::string&		name,
125															  const std::string&		description,
126															  const ImageType			imageType,
127															  const tcu::UVec3&			imageSize,
128															  const tcu::TextureFormat&	format,
129															  const glu::GLSLVersion	glslVersion)
130	: TestCase				(testCtx, name, description)
131	, m_imageType			(imageType)
132	, m_imageSize			(imageSize)
133	, m_format				(format)
134	, m_glslVersion			(glslVersion)
135{
136}
137
138class ImageSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
139{
140public:
141					ImageSparseMemoryAliasingInstance	(Context&								context,
142														 const ImageType						imageType,
143														 const tcu::UVec3&						imageSize,
144														 const tcu::TextureFormat&				format);
145
146	tcu::TestStatus	iterate								(void);
147
148private:
149	const ImageType				m_imageType;
150	const tcu::UVec3			m_imageSize;
151	const tcu::TextureFormat	m_format;
152};
153
154ImageSparseMemoryAliasingInstance::ImageSparseMemoryAliasingInstance (Context&					context,
155																	  const ImageType			imageType,
156																	  const tcu::UVec3&			imageSize,
157																	  const tcu::TextureFormat&	format)
158	: SparseResourcesBaseInstance	(context)
159	, m_imageType					(imageType)
160	, m_imageSize					(imageSize)
161	, m_format						(format)
162{
163}
164
165tcu::TestStatus ImageSparseMemoryAliasingInstance::iterate (void)
166{
167	const InstanceInterface&			instance				= m_context.getInstanceInterface();
168	const VkPhysicalDevice				physicalDevice			= m_context.getPhysicalDevice();
169	const tcu::UVec3					maxWorkGroupSize		= tcu::UVec3(128u, 128u, 64u);
170	const tcu::UVec3					maxWorkGroupCount		= tcu::UVec3(65535u, 65535u, 65535u);
171	const deUint32						maxWorkGroupInvocations	= 128u;
172	VkImageCreateInfo					imageSparseInfo;
173	VkSparseImageMemoryRequirements		aspectRequirements;
174	std::vector<DeviceMemorySp>			deviceMemUniquePtrVec;
175
176	// Check if image size does not exceed device limits
177	if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
178		TCU_THROW(NotSupportedError, "Image size not supported for device");
179
180	// Check if sparse memory aliasing is supported
181	if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyAliased)
182		TCU_THROW(NotSupportedError, "Sparse memory aliasing not supported");
183
184	// Check if device supports sparse operations for image type
185	if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
186		TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
187
188	imageSparseInfo.sType					= VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
189	imageSparseInfo.pNext					= DE_NULL;
190	imageSparseInfo.flags					= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
191											  VK_IMAGE_CREATE_SPARSE_ALIASED_BIT   |
192											  VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
193	imageSparseInfo.imageType				= mapImageType(m_imageType);
194	imageSparseInfo.format					= mapTextureFormat(m_format);
195	imageSparseInfo.extent					= makeExtent3D(getLayerSize(m_imageType, m_imageSize));
196	imageSparseInfo.arrayLayers				= getNumLayers(m_imageType, m_imageSize);
197	imageSparseInfo.samples					= VK_SAMPLE_COUNT_1_BIT;
198	imageSparseInfo.tiling					= VK_IMAGE_TILING_OPTIMAL;
199	imageSparseInfo.initialLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
200	imageSparseInfo.usage					= VK_IMAGE_USAGE_TRANSFER_DST_BIT |
201											  VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
202											  VK_IMAGE_USAGE_STORAGE_BIT;
203	imageSparseInfo.sharingMode				= VK_SHARING_MODE_EXCLUSIVE;
204	imageSparseInfo.queueFamilyIndexCount	= 0u;
205	imageSparseInfo.pQueueFamilyIndices		= DE_NULL;
206
207	if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
208		imageSparseInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
209
210	{
211		// Assign maximum allowed mipmap levels to image
212		VkImageFormatProperties imageFormatProperties;
213		instance.getPhysicalDeviceImageFormatProperties(physicalDevice,
214			imageSparseInfo.format,
215			imageSparseInfo.imageType,
216			imageSparseInfo.tiling,
217			imageSparseInfo.usage,
218			imageSparseInfo.flags,
219			&imageFormatProperties);
220
221		imageSparseInfo.mipLevels = getImageMaxMipLevels(imageFormatProperties, imageSparseInfo.extent);
222	}
223
224	// Check if device supports sparse operations for image format
225	if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageSparseInfo))
226		TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
227
228	{
229		// Create logical device supporting both sparse and compute queues
230		QueueRequirementsVec queueRequirements;
231		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
232		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
233
234		createDeviceSupportingQueues(queueRequirements);
235	}
236
237	const DeviceInterface&	deviceInterface	= getDeviceInterface();
238	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
239	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
240
241	// Create sparse image
242	const Unique<VkImage> imageRead(createImage(deviceInterface, getDevice(), &imageSparseInfo));
243	const Unique<VkImage> imageWrite(createImage(deviceInterface, getDevice(), &imageSparseInfo));
244
245	// Create semaphores to synchronize sparse binding operations with other operations on the sparse images
246	const Unique<VkSemaphore> memoryBindSemaphoreTransfer(makeSemaphore(deviceInterface, getDevice()));
247	const Unique<VkSemaphore> memoryBindSemaphoreCompute(makeSemaphore(deviceInterface, getDevice()));
248
249	const VkSemaphore imageMemoryBindSemaphores[] = { memoryBindSemaphoreTransfer.get(), memoryBindSemaphoreCompute.get() };
250
251	{
252		std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
253		std::vector<VkSparseMemoryBind>		 imageReadMipTailBinds;
254		std::vector<VkSparseMemoryBind>		 imageWriteMipTailBinds;
255
256		// Get sparse image general memory requirements
257		const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *imageRead);
258
259		// Check if required image memory size does not exceed device limits
260		if (imageMemoryRequirements.size > getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
261			TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
262
263		DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
264
265		// Get sparse image sparse memory requirements
266		const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *imageRead);
267
268		DE_ASSERT(sparseMemoryRequirements.size() != 0);
269
270		const deUint32 colorAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
271
272		if (colorAspectIndex == NO_MATCH_FOUND)
273			TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
274
275		aspectRequirements = sparseMemoryRequirements[colorAspectIndex];
276
277		const VkImageAspectFlags	aspectMask			= aspectRequirements.formatProperties.aspectMask;
278		const VkExtent3D			imageGranularity	= aspectRequirements.formatProperties.imageGranularity;
279
280		DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
281
282		const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
283
284		if (memoryType == NO_MATCH_FOUND)
285			return tcu::TestStatus::fail("No matching memory type found");
286
287		// Bind memory for each layer
288		for (deUint32 layerNdx = 0; layerNdx < imageSparseInfo.arrayLayers; ++layerNdx)
289		{
290			for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
291			{
292				const VkExtent3D			mipExtent		= mipLevelExtents(imageSparseInfo.extent, mipLevelNdx);
293				const tcu::UVec3			sparseBlocks	= alignedDivide(mipExtent, imageGranularity);
294				const deUint32				numSparseBlocks = sparseBlocks.x() * sparseBlocks.y() * sparseBlocks.z();
295				const VkImageSubresource	subresource		= { aspectMask, mipLevelNdx, layerNdx };
296
297				const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
298					imageMemoryRequirements.alignment * numSparseBlocks, memoryType, subresource, makeOffset3D(0u, 0u, 0u), mipExtent);
299
300				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
301
302				imageResidencyMemoryBinds.push_back(imageMemoryBind);
303			}
304
305			if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
306			{
307				const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
308					aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
309
310				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
311
312				imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
313
314				const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
315					aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
316
317				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
318
319				imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
320			}
321		}
322
323		if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageSparseInfo.mipLevels)
324		{
325			const VkSparseMemoryBind imageReadMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
326				aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
327
328			deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageReadMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
329
330			imageReadMipTailBinds.push_back(imageReadMipTailMemoryBind);
331
332			const VkSparseMemoryBind imageWriteMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
333				aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
334
335			deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageWriteMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
336
337			imageWriteMipTailBinds.push_back(imageWriteMipTailMemoryBind);
338		}
339
340		VkBindSparseInfo bindSparseInfo =
341		{
342			VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,		//VkStructureType							sType;
343			DE_NULL,								//const void*								pNext;
344			0u,										//deUint32									waitSemaphoreCount;
345			DE_NULL,								//const VkSemaphore*						pWaitSemaphores;
346			0u,										//deUint32									bufferBindCount;
347			DE_NULL,								//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
348			0u,										//deUint32									imageOpaqueBindCount;
349			DE_NULL,								//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
350			0u,										//deUint32									imageBindCount;
351			DE_NULL,								//const VkSparseImageMemoryBindInfo*		pImageBinds;
352			2u,										//deUint32									signalSemaphoreCount;
353			imageMemoryBindSemaphores				//const VkSemaphore*						pSignalSemaphores;
354		};
355
356		VkSparseImageMemoryBindInfo		  imageResidencyBindInfo[2];
357		VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo[2];
358
359		if (imageResidencyMemoryBinds.size() > 0)
360		{
361			imageResidencyBindInfo[0].image		= *imageRead;
362			imageResidencyBindInfo[0].bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
363			imageResidencyBindInfo[0].pBinds	= &imageResidencyMemoryBinds[0];
364
365			imageResidencyBindInfo[1].image		= *imageWrite;
366			imageResidencyBindInfo[1].bindCount = static_cast<deUint32>(imageResidencyMemoryBinds.size());
367			imageResidencyBindInfo[1].pBinds	= &imageResidencyMemoryBinds[0];
368
369			bindSparseInfo.imageBindCount		= 2u;
370			bindSparseInfo.pImageBinds			= imageResidencyBindInfo;
371		}
372
373		if (imageReadMipTailBinds.size() > 0)
374		{
375			imageMipTailBindInfo[0].image		= *imageRead;
376			imageMipTailBindInfo[0].bindCount	= static_cast<deUint32>(imageReadMipTailBinds.size());
377			imageMipTailBindInfo[0].pBinds		= &imageReadMipTailBinds[0];
378
379			imageMipTailBindInfo[1].image		= *imageWrite;
380			imageMipTailBindInfo[1].bindCount	= static_cast<deUint32>(imageWriteMipTailBinds.size());
381			imageMipTailBindInfo[1].pBinds		= &imageWriteMipTailBinds[0];
382
383			bindSparseInfo.imageOpaqueBindCount = 2u;
384			bindSparseInfo.pImageOpaqueBinds	= imageMipTailBindInfo;
385		}
386
387		// Submit sparse bind commands for execution
388		VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
389	}
390
391	// Create command buffer for compute and transfer oparations
392	const Unique<VkCommandPool>	  commandPool  (makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
393	const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, getDevice(), *commandPool));
394
395	std::vector<VkBufferImageCopy> bufferImageCopy(imageSparseInfo.mipLevels);
396
397	{
398		deUint32 bufferOffset = 0u;
399		for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
400		{
401			bufferImageCopy[mipLevelNdx] = makeBufferImageCopy(mipLevelExtents(imageSparseInfo.extent, mipLevelNdx), imageSparseInfo.arrayLayers, mipLevelNdx, bufferOffset);
402			bufferOffset += getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
403		}
404	}
405
406	// Start recording commands
407	beginCommandBuffer(deviceInterface, *commandBuffer);
408
409	const deUint32					imageSizeInBytes		= getImageSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, imageSparseInfo.mipLevels, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
410	const VkBufferCreateInfo		inputBufferCreateInfo	= makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
411	const Unique<VkBuffer>			inputBuffer				(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
412	const de::UniquePtr<Allocation>	inputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
413
414	std::vector<deUint8> referenceData(imageSizeInBytes);
415
416	for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
417	{
418		const deUint32 mipLevelSizeInBytes	= getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx);
419		const deUint32 bufferOffset			= static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
420
421		deMemset(&referenceData[bufferOffset], mipLevelNdx + 1u, mipLevelSizeInBytes);
422	}
423
424	deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], imageSizeInBytes);
425
426	flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), imageSizeInBytes);
427
428	{
429		const VkBufferMemoryBarrier inputBufferBarrier = makeBufferMemoryBarrier
430		(
431			VK_ACCESS_HOST_WRITE_BIT,
432			VK_ACCESS_TRANSFER_READ_BIT,
433			*inputBuffer,
434			0u,
435			imageSizeInBytes
436		);
437
438		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
439	}
440
441	{
442		const VkImageMemoryBarrier imageSparseTransferDstBarrier = makeImageMemoryBarrier
443		(
444			0u,
445			VK_ACCESS_TRANSFER_WRITE_BIT,
446			VK_IMAGE_LAYOUT_UNDEFINED,
447			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
448			sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex  : VK_QUEUE_FAMILY_IGNORED,
449			sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
450			*imageRead,
451			makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
452		);
453
454		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferDstBarrier);
455	}
456
457	deviceInterface.cmdCopyBufferToImage(*commandBuffer, *inputBuffer, *imageRead, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, static_cast<deUint32>(bufferImageCopy.size()), &bufferImageCopy[0]);
458
459	{
460		const VkImageMemoryBarrier imageSparseTransferSrcBarrier = makeImageMemoryBarrier
461		(
462			VK_ACCESS_TRANSFER_WRITE_BIT,
463			VK_ACCESS_TRANSFER_READ_BIT,
464			VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
465			VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
466			*imageRead,
467			makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
468		);
469
470		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferSrcBarrier);
471	}
472
473	{
474		const VkImageMemoryBarrier imageSparseShaderStorageBarrier = makeImageMemoryBarrier
475		(
476			0u,
477			VK_ACCESS_SHADER_WRITE_BIT,
478			VK_IMAGE_LAYOUT_UNDEFINED,
479			VK_IMAGE_LAYOUT_GENERAL,
480			*imageWrite,
481			makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, imageSparseInfo.mipLevels, 0u, imageSparseInfo.arrayLayers)
482		);
483
484		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseShaderStorageBarrier);
485	}
486
487	// Create descriptor set layout
488	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
489		DescriptorSetLayoutBuilder()
490		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
491		.build(deviceInterface, getDevice()));
492
493	Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
494
495	Unique<VkDescriptorPool> descriptorPool(
496		DescriptorPoolBuilder()
497		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, imageSparseInfo.mipLevels)
498		.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, imageSparseInfo.mipLevels));
499
500	typedef de::SharedPtr< Unique<VkImageView> >		SharedVkImageView;
501	std::vector<SharedVkImageView>						imageViews;
502	imageViews.resize(imageSparseInfo.mipLevels);
503
504	typedef de::SharedPtr< Unique<VkDescriptorSet> >	SharedVkDescriptorSet;
505	std::vector<SharedVkDescriptorSet>					descriptorSets;
506	descriptorSets.resize(imageSparseInfo.mipLevels);
507
508	typedef de::SharedPtr< Unique<VkPipeline> >			SharedVkPipeline;
509	std::vector<SharedVkPipeline>						computePipelines;
510	computePipelines.resize(imageSparseInfo.mipLevels);
511
512	for (deUint32 mipLevelNdx = 0u; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
513	{
514		std::ostringstream name;
515		name << "comp" << mipLevelNdx;
516
517		// Create and bind compute pipeline
518		Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get(name.str()), DE_NULL));
519
520		computePipelines[mipLevelNdx]	= makeVkSharedPtr(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
521		VkPipeline computePipeline		= **computePipelines[mipLevelNdx];
522
523		deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline);
524
525		// Create and bind descriptor set
526		descriptorSets[mipLevelNdx]		= makeVkSharedPtr(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
527		VkDescriptorSet descriptorSet	= **descriptorSets[mipLevelNdx];
528
529		// Select which mipmap level to bind
530		const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, mipLevelNdx, 1u, 0u, imageSparseInfo.arrayLayers);
531
532		imageViews[mipLevelNdx] = makeVkSharedPtr(makeImageView(deviceInterface, getDevice(), *imageWrite, mapImageViewType(m_imageType), imageSparseInfo.format, subresourceRange));
533		VkImageView imageView	= **imageViews[mipLevelNdx];
534
535		const VkDescriptorImageInfo sparseImageInfo = makeDescriptorImageInfo(DE_NULL, imageView, VK_IMAGE_LAYOUT_GENERAL);
536
537		DescriptorSetUpdateBuilder()
538			.writeSingle(descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
539			.update(deviceInterface, getDevice());
540
541		deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet, 0u, DE_NULL);
542
543		const tcu::UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
544		const deUint32		xWorkGroupSize		= std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
545		const deUint32		yWorkGroupSize		= std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
546		const deUint32		zWorkGroupSize		= std::min(std::min(gridSize.z(), maxWorkGroupSize.z()), maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
547
548		const deUint32		xWorkGroupCount		= gridSize.x() / xWorkGroupSize + (gridSize.x() % xWorkGroupSize ? 1u : 0u);
549		const deUint32		yWorkGroupCount		= gridSize.y() / yWorkGroupSize + (gridSize.y() % yWorkGroupSize ? 1u : 0u);
550		const deUint32		zWorkGroupCount		= gridSize.z() / zWorkGroupSize + (gridSize.z() % zWorkGroupSize ? 1u : 0u);
551
552		if (maxWorkGroupCount.x() < xWorkGroupCount ||
553			maxWorkGroupCount.y() < yWorkGroupCount ||
554			maxWorkGroupCount.z() < zWorkGroupCount)
555			TCU_THROW(NotSupportedError, "Image size is not supported");
556
557		deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
558	}
559
560	{
561		const VkMemoryBarrier memoryBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
562
563		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 1u, &memoryBarrier, 0u, DE_NULL, 0u, DE_NULL);
564	}
565
566	const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
567	const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
568	const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
569
570	deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *imageRead, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, static_cast<deUint32>(bufferImageCopy.size()), &bufferImageCopy[0]);
571
572	{
573		const VkBufferMemoryBarrier outputBufferBarrier = makeBufferMemoryBarrier
574		(
575			VK_ACCESS_TRANSFER_WRITE_BIT,
576			VK_ACCESS_HOST_READ_BIT,
577			*outputBuffer,
578			0u,
579			imageSizeInBytes
580		);
581
582		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
583	}
584
585	// End recording commands
586	endCommandBuffer(deviceInterface, *commandBuffer);
587
588	const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
589
590	// Submit commands for execution and wait for completion
591	submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 2u, imageMemoryBindSemaphores, stageBits);
592
593	// Retrieve data from buffer to host memory
594	invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), imageSizeInBytes);
595
596	const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
597
598	// Wait for sparse queue to become idle
599	deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
600
601	for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
602	{
603		const tcu::UVec3				  gridSize		= getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
604		const deUint32					  bufferOffset	= static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
605		const tcu::ConstPixelBufferAccess pixelBuffer	= tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData + bufferOffset);
606
607		for (deUint32 offsetZ = 0u; offsetZ < gridSize.z(); ++offsetZ)
608		for (deUint32 offsetY = 0u; offsetY < gridSize.y(); ++offsetY)
609		for (deUint32 offsetX = 0u; offsetX < gridSize.x(); ++offsetX)
610		{
611			const deUint32 index			= offsetX + (offsetY + offsetZ * gridSize.y()) * gridSize.x();
612			const tcu::UVec4 referenceValue = tcu::UVec4(index % MODULO_DIVISOR, index % MODULO_DIVISOR, index % MODULO_DIVISOR, 1u);
613			const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
614
615			if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
616				return tcu::TestStatus::fail("Failed");
617		}
618	}
619
620	for (deUint32 mipLevelNdx = aspectRequirements.imageMipTailFirstLod; mipLevelNdx < imageSparseInfo.mipLevels; ++mipLevelNdx)
621	{
622		const deUint32 mipLevelSizeInBytes	= getImageMipLevelSizeInBytes(imageSparseInfo.extent, imageSparseInfo.arrayLayers, m_format, mipLevelNdx);
623		const deUint32 bufferOffset			= static_cast<deUint32>(bufferImageCopy[mipLevelNdx].bufferOffset);
624
625		if (deMemCmp(outputData + bufferOffset, &referenceData[bufferOffset], mipLevelSizeInBytes) != 0)
626			return tcu::TestStatus::fail("Failed");
627	}
628
629	return tcu::TestStatus::pass("Passed");
630}
631
632void ImageSparseMemoryAliasingCase::initPrograms(SourceCollections&	sourceCollections) const
633{
634	const char* const	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
635	const std::string	imageTypeStr			= getShaderImageType(m_format, m_imageType);
636	const std::string	formatQualifierStr		= getShaderImageFormatQualifier(m_format);
637	const std::string	formatDataStr			= getShaderImageDataType(m_format);
638	const deUint32		maxWorkGroupInvocations = 128u;
639	const tcu::UVec3	maxWorkGroupSize		= tcu::UVec3(128u, 128u, 64u);
640
641	const tcu::UVec3	layerSize				= getLayerSize(m_imageType, m_imageSize);
642	const deUint32		widestEdge				= std::max(std::max(layerSize.x(), layerSize.y()), layerSize.z());
643	const deUint32		mipLevels				= static_cast<deUint32>(deFloatLog2(static_cast<float>(widestEdge))) + 1u;
644
645	for (deUint32 mipLevelNdx = 0; mipLevelNdx < mipLevels; ++mipLevelNdx)
646	{
647		// Create compute program
648		const tcu::UVec3	gridSize		= getShaderGridSize(m_imageType, m_imageSize, mipLevelNdx);
649		const deUint32		xWorkGroupSize  = std::min(std::min(gridSize.x(), maxWorkGroupSize.x()), maxWorkGroupInvocations);
650		const deUint32		yWorkGroupSize  = std::min(std::min(gridSize.y(), maxWorkGroupSize.y()), maxWorkGroupInvocations / xWorkGroupSize);
651		const deUint32		zWorkGroupSize  = std::min(std::min(gridSize.z(), maxWorkGroupSize.z()), maxWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
652
653		std::ostringstream src;
654
655		src << versionDecl << "\n"
656			<< "layout (local_size_x = " << xWorkGroupSize << ", local_size_y = " << yWorkGroupSize << ", local_size_z = " << zWorkGroupSize << ") in; \n"
657			<< "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
658			<< "void main (void)\n"
659			<< "{\n"
660			<< "	if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
661			<< "	if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
662			<< "	if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
663			<< "	{\n"
664			<< "		int index = int(gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + gl_GlobalInvocationID.z*" << gridSize.y() << ")*" << gridSize.x() << ");\n"
665			<< "		imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
666			<< formatDataStr << "( index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", index % " << MODULO_DIVISOR << ", 1 )); \n"
667			<< "	}\n"
668			<< "}\n";
669
670		std::ostringstream name;
671		name << "comp" << mipLevelNdx;
672		sourceCollections.glslSources.add(name.str()) << glu::ComputeSource(src.str());
673	}
674}
675
676TestInstance* ImageSparseMemoryAliasingCase::createInstance (Context& context) const
677{
678	return new ImageSparseMemoryAliasingInstance(context, m_imageType, m_imageSize, m_format);
679}
680
681} // anonymous ns
682
683tcu::TestCaseGroup* createImageSparseMemoryAliasingTests (tcu::TestContext& testCtx)
684{
685	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_memory_aliasing", "Sparse Image Memory Aliasing"));
686
687	static const deUint32 sizeCountPerImageType = 4u;
688
689	struct ImageParameters
690	{
691		ImageType	imageType;
692		tcu::UVec3	imageSizes[sizeCountPerImageType];
693	};
694
695	static const ImageParameters imageParametersArray[] =
696	{
697		{ IMAGE_TYPE_2D,		{ tcu::UVec3(512u, 256u, 1u),	tcu::UVec3(128u, 128u, 1u),	tcu::UVec3(503u, 137u, 1u),	tcu::UVec3(11u, 37u, 1u) } },
698		{ IMAGE_TYPE_2D_ARRAY,	{ tcu::UVec3(512u, 256u, 6u),	tcu::UVec3(128u, 128u, 8u),	tcu::UVec3(503u, 137u, 3u),	tcu::UVec3(11u, 37u, 3u) } },
699		{ IMAGE_TYPE_CUBE,		{ tcu::UVec3(256u, 256u, 1u),	tcu::UVec3(128u, 128u, 1u),	tcu::UVec3(137u, 137u, 1u),	tcu::UVec3(11u, 11u, 1u) } },
700		{ IMAGE_TYPE_CUBE_ARRAY,{ tcu::UVec3(256u, 256u, 6u),	tcu::UVec3(128u, 128u, 8u),	tcu::UVec3(137u, 137u, 3u),	tcu::UVec3(11u, 11u, 3u) } },
701		{ IMAGE_TYPE_3D,		{ tcu::UVec3(256u, 256u, 16u),	tcu::UVec3(128u, 128u, 8u),	tcu::UVec3(503u, 137u, 3u),	tcu::UVec3(11u, 37u, 3u) } }
702	};
703
704	static const tcu::TextureFormat formats[] =
705	{
706		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT32),
707		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT16),
708		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT8),
709		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
710		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
711		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
712	};
713
714	for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
715	{
716		const ImageType					imageType = imageParametersArray[imageTypeNdx].imageType;
717		de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
718
719		for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
720		{
721			const tcu::TextureFormat&		format = formats[formatNdx];
722			de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
723
724			for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
725			{
726				const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
727
728				std::ostringstream stream;
729				stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
730
731				formatGroup->addChild(new ImageSparseMemoryAliasingCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440));
732			}
733			imageTypeGroup->addChild(formatGroup.release());
734		}
735		testGroup->addChild(imageTypeGroup.release());
736	}
737
738	return testGroup.release();
739}
740
741} // sparse
742} // vkt
743