tcuCompressedTexture.cpp revision 5f78b1323b6ef28d8b9cdce6fefcbbb61a0477a9
1/*-------------------------------------------------------------------------
2 * drawElements Quality Program Tester Core
3 * ----------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Compressed Texture Utilities.
22 *//*--------------------------------------------------------------------*/
23
24#include "tcuCompressedTexture.hpp"
25#include "tcuTextureUtil.hpp"
26
27#include "deStringUtil.hpp"
28#include "deFloat16.h"
29
30#include <algorithm>
31
32namespace tcu
33{
34
35namespace
36{
37
38enum { ASTC_BLOCK_SIZE_BYTES = 128/8 };
39
40template <typename T, typename Y>
41struct isSameType			{ enum { V = 0 }; };
42template <typename T>
43struct isSameType<T, T>		{ enum { V = 1 }; };
44
45} // anonymous
46
47int getBlockSize (CompressedTexFormat format)
48{
49	if (isAstcFormat(format))
50	{
51		return ASTC_BLOCK_SIZE_BYTES;
52	}
53	else if (isEtcFormat(format))
54	{
55		switch (format)
56		{
57			case COMPRESSEDTEXFORMAT_ETC1_RGB8:							return 8;
58			case COMPRESSEDTEXFORMAT_EAC_R11:							return 8;
59			case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:					return 8;
60			case COMPRESSEDTEXFORMAT_EAC_RG11:							return 16;
61			case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:					return 16;
62			case COMPRESSEDTEXFORMAT_ETC2_RGB8:							return 8;
63			case COMPRESSEDTEXFORMAT_ETC2_SRGB8:						return 8;
64			case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:		return 8;
65			case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:	return 8;
66			case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:					return 16;
67			case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:				return 16;
68
69			default:
70				DE_ASSERT(false);
71				return -1;
72		}
73	}
74	else
75	{
76		DE_ASSERT(false);
77		return -1;
78	}
79}
80
81IVec3 getBlockPixelSize (CompressedTexFormat format)
82{
83	if (isEtcFormat(format))
84	{
85		return IVec3(4, 4, 1);
86	}
87	else if (isAstcFormat(format))
88	{
89		switch (format)
90		{
91			case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:				return IVec3(4,  4,  1);
92			case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:				return IVec3(5,  4,  1);
93			case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:				return IVec3(5,  5,  1);
94			case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:				return IVec3(6,  5,  1);
95			case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:				return IVec3(6,  6,  1);
96			case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:				return IVec3(8,  5,  1);
97			case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:				return IVec3(8,  6,  1);
98			case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:				return IVec3(8,  8,  1);
99			case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:			return IVec3(10, 5,  1);
100			case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:			return IVec3(10, 6,  1);
101			case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:			return IVec3(10, 8,  1);
102			case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:			return IVec3(10, 10, 1);
103			case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:			return IVec3(12, 10, 1);
104			case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:			return IVec3(12, 12, 1);
105			case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:		return IVec3(4,  4,  1);
106			case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:		return IVec3(5,  4,  1);
107			case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:		return IVec3(5,  5,  1);
108			case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:		return IVec3(6,  5,  1);
109			case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:		return IVec3(6,  6,  1);
110			case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:		return IVec3(8,  5,  1);
111			case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:		return IVec3(8,  6,  1);
112			case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:		return IVec3(8,  8,  1);
113			case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:	return IVec3(10, 5,  1);
114			case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:	return IVec3(10, 6,  1);
115			case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:	return IVec3(10, 8,  1);
116			case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:	return IVec3(10, 10, 1);
117			case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:	return IVec3(12, 10, 1);
118			case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:	return IVec3(12, 12, 1);
119
120			default:
121				DE_ASSERT(false);
122				return IVec3();
123		}
124	}
125	else
126	{
127		DE_ASSERT(false);
128		return IVec3(-1);
129	}
130}
131
132bool isEtcFormat (CompressedTexFormat format)
133{
134	switch (format)
135	{
136		case COMPRESSEDTEXFORMAT_ETC1_RGB8:
137		case COMPRESSEDTEXFORMAT_EAC_R11:
138		case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:
139		case COMPRESSEDTEXFORMAT_EAC_RG11:
140		case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:
141		case COMPRESSEDTEXFORMAT_ETC2_RGB8:
142		case COMPRESSEDTEXFORMAT_ETC2_SRGB8:
143		case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
144		case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
145		case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:
146		case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:
147			return true;
148
149		default:
150			return false;
151	}
152}
153
154bool isAstcFormat (CompressedTexFormat format)
155{
156	switch (format)
157	{
158		case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
159		case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
160		case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
161		case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
162		case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
163		case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
164		case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
165		case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
166		case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
167		case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
168		case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
169		case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
170		case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
171		case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
172		case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
173		case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
174		case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
175		case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
176		case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
177		case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
178		case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
179		case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
180		case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
181		case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
182		case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
183		case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
184		case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
185		case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
186			return true;
187
188		default:
189			return false;
190	}
191}
192
193bool isAstcSRGBFormat (CompressedTexFormat format)
194{
195	switch (format)
196	{
197		case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
198		case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
199		case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
200		case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
201		case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
202		case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
203		case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
204		case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
205		case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
206		case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
207		case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
208		case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
209		case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
210		case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
211			return true;
212
213		default:
214			return false;
215	}
216}
217
218TextureFormat getUncompressedFormat (CompressedTexFormat format)
219{
220	if (isEtcFormat(format))
221	{
222		switch (format)
223		{
224			case COMPRESSEDTEXFORMAT_ETC1_RGB8:							return TextureFormat(TextureFormat::RGB,	TextureFormat::UNORM_INT8);
225			case COMPRESSEDTEXFORMAT_EAC_R11:							return TextureFormat(TextureFormat::R,		TextureFormat::UNORM_INT16);
226			case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:					return TextureFormat(TextureFormat::R,		TextureFormat::SNORM_INT16);
227			case COMPRESSEDTEXFORMAT_EAC_RG11:							return TextureFormat(TextureFormat::RG,		TextureFormat::UNORM_INT16);
228			case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:					return TextureFormat(TextureFormat::RG,		TextureFormat::SNORM_INT16);
229			case COMPRESSEDTEXFORMAT_ETC2_RGB8:							return TextureFormat(TextureFormat::RGB,	TextureFormat::UNORM_INT8);
230			case COMPRESSEDTEXFORMAT_ETC2_SRGB8:						return TextureFormat(TextureFormat::sRGB,	TextureFormat::UNORM_INT8);
231			case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:		return TextureFormat(TextureFormat::RGBA,	TextureFormat::UNORM_INT8);
232			case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:	return TextureFormat(TextureFormat::sRGBA,	TextureFormat::UNORM_INT8);
233			case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:					return TextureFormat(TextureFormat::RGBA,	TextureFormat::UNORM_INT8);
234			case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:				return TextureFormat(TextureFormat::sRGBA,	TextureFormat::UNORM_INT8);
235
236			default:
237				DE_ASSERT(false);
238				return TextureFormat();
239		}
240	}
241	else if (isAstcFormat(format))
242	{
243		if (isAstcSRGBFormat(format))
244			return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
245		else
246			return TextureFormat(TextureFormat::RGBA, TextureFormat::HALF_FLOAT);
247	}
248	else
249	{
250		DE_ASSERT(false);
251		return TextureFormat();
252	}
253}
254
255CompressedTexFormat getAstcFormatByBlockSize (const IVec3& size, bool isSRGB)
256{
257	if (size.z() > 1)
258		throw InternalError("3D ASTC textures not currently supported");
259
260	for (int fmtI = 0; fmtI < COMPRESSEDTEXFORMAT_LAST; fmtI++)
261	{
262		const CompressedTexFormat fmt = (CompressedTexFormat)fmtI;
263
264		if (isAstcFormat(fmt) && getBlockPixelSize(fmt) == size && isAstcSRGBFormat(fmt) == isSRGB)
265			return fmt;
266	}
267
268	throw InternalError("Invalid ASTC block size " + de::toString(size.x()) + "x" + de::toString(size.y()) + "x" + de::toString(size.z()));
269}
270
271namespace
272{
273
274inline int divRoundUp (int a, int b)
275{
276	return a/b + ((a%b) ? 1 : 0);
277}
278
279// \todo [2013-08-06 nuutti] ETC and ASTC decompression codes are rather unrelated, and are already in their own "private" namespaces - should this be split to multiple files?
280
281namespace EtcDecompressInternal
282{
283
284enum
285{
286	ETC2_BLOCK_WIDTH					= 4,
287	ETC2_BLOCK_HEIGHT					= 4,
288	ETC2_UNCOMPRESSED_PIXEL_SIZE_A8		= 1,
289	ETC2_UNCOMPRESSED_PIXEL_SIZE_R11	= 2,
290	ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11	= 4,
291	ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8	= 3,
292	ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8	= 4,
293	ETC2_UNCOMPRESSED_BLOCK_SIZE_A8		= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8,
294	ETC2_UNCOMPRESSED_BLOCK_SIZE_R11	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11,
295	ETC2_UNCOMPRESSED_BLOCK_SIZE_RG11	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11,
296	ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8,
297	ETC2_UNCOMPRESSED_BLOCK_SIZE_RGBA8	= ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8
298};
299
300inline deUint64 get64BitBlock (const deUint8* src, int blockNdx)
301{
302	// Stored in big-endian form.
303	deUint64 block = 0;
304
305	for (int i = 0; i < 8; i++)
306		block = (block << 8ull) | (deUint64)(src[blockNdx*8+i]);
307
308	return block;
309}
310
311// Return the first 64 bits of a 128 bit block.
312inline deUint64 get128BitBlockStart (const deUint8* src, int blockNdx)
313{
314	return get64BitBlock(src, 2*blockNdx);
315}
316
317// Return the last 64 bits of a 128 bit block.
318inline deUint64 get128BitBlockEnd (const deUint8* src, int blockNdx)
319{
320	return get64BitBlock(src, 2*blockNdx + 1);
321}
322
323inline deUint32 getBit (deUint64 src, int bit)
324{
325	return (src >> bit) & 1;
326}
327
328inline deUint32 getBits (deUint64 src, int low, int high)
329{
330	const int numBits = (high-low) + 1;
331	DE_ASSERT(de::inRange(numBits, 1, 32));
332	return (src >> low) & ((1<<numBits)-1);
333}
334
335inline deUint8 extend4To8 (deUint8 src)
336{
337	DE_ASSERT((src & ~((1<<4)-1)) == 0);
338	return (src << 4) | src;
339}
340
341inline deUint8 extend5To8 (deUint8 src)
342{
343	DE_ASSERT((src & ~((1<<5)-1)) == 0);
344	return (src << 3) | (src >> 2);
345}
346
347inline deUint8 extend6To8 (deUint8 src)
348{
349	DE_ASSERT((src & ~((1<<6)-1)) == 0);
350	return (src << 2) | (src >> 4);
351}
352
353inline deUint8 extend7To8 (deUint8 src)
354{
355	DE_ASSERT((src & ~((1<<7)-1)) == 0);
356	return (src << 1) | (src >> 6);
357}
358
359inline deInt8 extendSigned3To8 (deUint8 src)
360{
361	const bool isNeg = (src & (1<<2)) != 0;
362	return (deInt8)((isNeg ? ~((1<<3)-1) : 0) | src);
363}
364
365inline deUint8 extend5Delta3To8 (deUint8 base5, deUint8 delta3)
366{
367	const deUint8 t = (deUint8)((deInt8)base5 + extendSigned3To8(delta3));
368	return extend5To8(t);
369}
370
371inline deUint16 extend11To16 (deUint16 src)
372{
373	DE_ASSERT((src & ~((1<<11)-1)) == 0);
374	return (src << 5) | (src >> 6);
375}
376
377inline deInt16 extend11To16WithSign (deInt16 src)
378{
379	if (src < 0)
380		return -(deInt16)extend11To16(-src);
381	else
382		return (deInt16)extend11To16(src);
383}
384
385void decompressETC1Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], deUint64 src)
386{
387	const int		diffBit		= (int)getBit(src, 33);
388	const int		flipBit		= (int)getBit(src, 32);
389	const deUint32	table[2]	= { getBits(src, 37, 39), getBits(src, 34, 36) };
390	deUint8			baseR[2];
391	deUint8			baseG[2];
392	deUint8			baseB[2];
393
394	if (diffBit == 0)
395	{
396		// Individual mode.
397		baseR[0] = extend4To8((deUint8)getBits(src, 60, 63));
398		baseR[1] = extend4To8((deUint8)getBits(src, 56, 59));
399		baseG[0] = extend4To8((deUint8)getBits(src, 52, 55));
400		baseG[1] = extend4To8((deUint8)getBits(src, 48, 51));
401		baseB[0] = extend4To8((deUint8)getBits(src, 44, 47));
402		baseB[1] = extend4To8((deUint8)getBits(src, 40, 43));
403	}
404	else
405	{
406		// Differential mode (diffBit == 1).
407		deUint8 bR = (deUint8)getBits(src, 59, 63); // 5b
408		deUint8 dR = (deUint8)getBits(src, 56, 58); // 3b
409		deUint8 bG = (deUint8)getBits(src, 51, 55);
410		deUint8 dG = (deUint8)getBits(src, 48, 50);
411		deUint8 bB = (deUint8)getBits(src, 43, 47);
412		deUint8 dB = (deUint8)getBits(src, 40, 42);
413
414		baseR[0] = extend5To8(bR);
415		baseG[0] = extend5To8(bG);
416		baseB[0] = extend5To8(bB);
417
418		baseR[1] = extend5Delta3To8(bR, dR);
419		baseG[1] = extend5Delta3To8(bG, dG);
420		baseB[1] = extend5Delta3To8(bB, dB);
421	}
422
423	static const int modifierTable[8][4] =
424	{
425	//	  00   01   10    11
426		{  2,   8,  -2,   -8 },
427		{  5,  17,  -5,  -17 },
428		{  9,  29,  -9,  -29 },
429		{ 13,  42, -13,  -42 },
430		{ 18,  60, -18,  -60 },
431		{ 24,  80, -24,  -80 },
432		{ 33, 106, -33, -106 },
433		{ 47, 183, -47, -183 }
434	};
435
436	// Write final pixels.
437	for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
438	{
439		const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
440		const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
441		const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
442		const int		subBlock		= ((flipBit ? y : x) >= 2) ? 1 : 0;
443		const deUint32	tableNdx		= table[subBlock];
444		const deUint32	modifierNdx		= (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
445		const int		modifier		= modifierTable[tableNdx][modifierNdx];
446
447		dst[dstOffset+0] = (deUint8)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
448		dst[dstOffset+1] = (deUint8)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
449		dst[dstOffset+2] = (deUint8)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
450	}
451}
452
453// if alphaMode is true, do PUNCHTHROUGH and store alpha to alphaDst; otherwise do ordinary ETC2 RGB8.
454void decompressETC2Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], deUint64 src, deUint8 alphaDst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], bool alphaMode)
455{
456	enum Etc2Mode
457	{
458		MODE_INDIVIDUAL = 0,
459		MODE_DIFFERENTIAL,
460		MODE_T,
461		MODE_H,
462		MODE_PLANAR,
463
464		MODE_LAST
465	};
466
467	const int		diffOpaqueBit	= (int)getBit(src, 33);
468	const deInt8	selBR			= (deInt8)getBits(src, 59, 63);	// 5 bits.
469	const deInt8	selBG			= (deInt8)getBits(src, 51, 55);
470	const deInt8	selBB			= (deInt8)getBits(src, 43, 47);
471	const deInt8	selDR			= extendSigned3To8((deUint8)getBits(src, 56, 58)); // 3 bits.
472	const deInt8	selDG			= extendSigned3To8((deUint8)getBits(src, 48, 50));
473	const deInt8	selDB			= extendSigned3To8((deUint8)getBits(src, 40, 42));
474	Etc2Mode		mode;
475
476	if (!alphaMode && diffOpaqueBit == 0)
477		mode = MODE_INDIVIDUAL;
478	else if (!de::inRange(selBR + selDR, 0, 31))
479		mode = MODE_T;
480	else if (!de::inRange(selBG + selDG, 0, 31))
481		mode = MODE_H;
482	else if (!de::inRange(selBB + selDB, 0, 31))
483		mode = MODE_PLANAR;
484	else
485		mode = MODE_DIFFERENTIAL;
486
487	if (mode == MODE_INDIVIDUAL || mode == MODE_DIFFERENTIAL)
488	{
489		// Individual and differential modes have some steps in common, handle them here.
490		static const int modifierTable[8][4] =
491		{
492		//	  00   01   10    11
493			{  2,   8,  -2,   -8 },
494			{  5,  17,  -5,  -17 },
495			{  9,  29,  -9,  -29 },
496			{ 13,  42, -13,  -42 },
497			{ 18,  60, -18,  -60 },
498			{ 24,  80, -24,  -80 },
499			{ 33, 106, -33, -106 },
500			{ 47, 183, -47, -183 }
501		};
502
503		const int		flipBit		= (int)getBit(src, 32);
504		const deUint32	table[2]	= { getBits(src, 37, 39), getBits(src, 34, 36) };
505		deUint8			baseR[2];
506		deUint8			baseG[2];
507		deUint8			baseB[2];
508
509		if (mode == MODE_INDIVIDUAL)
510		{
511			// Individual mode, initial values.
512			baseR[0] = extend4To8((deUint8)getBits(src, 60, 63));
513			baseR[1] = extend4To8((deUint8)getBits(src, 56, 59));
514			baseG[0] = extend4To8((deUint8)getBits(src, 52, 55));
515			baseG[1] = extend4To8((deUint8)getBits(src, 48, 51));
516			baseB[0] = extend4To8((deUint8)getBits(src, 44, 47));
517			baseB[1] = extend4To8((deUint8)getBits(src, 40, 43));
518		}
519		else
520		{
521			// Differential mode, initial values.
522			baseR[0] = extend5To8(selBR);
523			baseG[0] = extend5To8(selBG);
524			baseB[0] = extend5To8(selBB);
525
526			baseR[1] = extend5To8((deUint8)(selBR + selDR));
527			baseG[1] = extend5To8((deUint8)(selBG + selDG));
528			baseB[1] = extend5To8((deUint8)(selBB + selDB));
529		}
530
531		// Write final pixels for individual or differential mode.
532		for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
533		{
534			const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
535			const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
536			const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
537			const int		subBlock		= ((flipBit ? y : x) >= 2) ? 1 : 0;
538			const deUint32	tableNdx		= table[subBlock];
539			const deUint32	modifierNdx		= (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
540			const int		alphaDstOffset	= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
541
542			// If doing PUNCHTHROUGH version (alphaMode), opaque bit may affect colors.
543			if (alphaMode && diffOpaqueBit == 0 && modifierNdx == 2)
544			{
545				dst[dstOffset+0]			= 0;
546				dst[dstOffset+1]			= 0;
547				dst[dstOffset+2]			= 0;
548				alphaDst[alphaDstOffset]	= 0;
549			}
550			else
551			{
552				int modifier;
553
554				// PUNCHTHROUGH version and opaque bit may also affect modifiers.
555				if (alphaMode && diffOpaqueBit == 0 && (modifierNdx == 0 || modifierNdx == 2))
556					modifier = 0;
557				else
558					modifier = modifierTable[tableNdx][modifierNdx];
559
560				dst[dstOffset+0] = (deUint8)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
561				dst[dstOffset+1] = (deUint8)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
562				dst[dstOffset+2] = (deUint8)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
563
564				if (alphaMode)
565					alphaDst[alphaDstOffset] = 255;
566			}
567		}
568	}
569	else if (mode == MODE_T || mode == MODE_H)
570	{
571		// T and H modes have some steps in common, handle them here.
572		static const int distTable[8] = { 3, 6, 11, 16, 23, 32, 41, 64 };
573
574		deUint8 paintR[4];
575		deUint8 paintG[4];
576		deUint8 paintB[4];
577
578		if (mode == MODE_T)
579		{
580			// T mode, calculate paint values.
581			const deUint8	R1a			= (deUint8)getBits(src, 59, 60);
582			const deUint8	R1b			= (deUint8)getBits(src, 56, 57);
583			const deUint8	G1			= (deUint8)getBits(src, 52, 55);
584			const deUint8	B1			= (deUint8)getBits(src, 48, 51);
585			const deUint8	R2			= (deUint8)getBits(src, 44, 47);
586			const deUint8	G2			= (deUint8)getBits(src, 40, 43);
587			const deUint8	B2			= (deUint8)getBits(src, 36, 39);
588			const deUint32	distNdx		= (getBits(src, 34, 35) << 1) | getBit(src, 32);
589			const int		dist		= distTable[distNdx];
590
591			paintR[0] = extend4To8((R1a << 2) | R1b);
592			paintG[0] = extend4To8(G1);
593			paintB[0] = extend4To8(B1);
594			paintR[2] = extend4To8(R2);
595			paintG[2] = extend4To8(G2);
596			paintB[2] = extend4To8(B2);
597			paintR[1] = (deUint8)deClamp32((int)paintR[2] + dist, 0, 255);
598			paintG[1] = (deUint8)deClamp32((int)paintG[2] + dist, 0, 255);
599			paintB[1] = (deUint8)deClamp32((int)paintB[2] + dist, 0, 255);
600			paintR[3] = (deUint8)deClamp32((int)paintR[2] - dist, 0, 255);
601			paintG[3] = (deUint8)deClamp32((int)paintG[2] - dist, 0, 255);
602			paintB[3] = (deUint8)deClamp32((int)paintB[2] - dist, 0, 255);
603		}
604		else
605		{
606			// H mode, calculate paint values.
607			const deUint8	R1		= (deUint8)getBits(src, 59, 62);
608			const deUint8	G1a		= (deUint8)getBits(src, 56, 58);
609			const deUint8	G1b		= (deUint8)getBit(src, 52);
610			const deUint8	B1a		= (deUint8)getBit(src, 51);
611			const deUint8	B1b		= (deUint8)getBits(src, 47, 49);
612			const deUint8	R2		= (deUint8)getBits(src, 43, 46);
613			const deUint8	G2		= (deUint8)getBits(src, 39, 42);
614			const deUint8	B2		= (deUint8)getBits(src, 35, 38);
615			deUint8			baseR[2];
616			deUint8			baseG[2];
617			deUint8			baseB[2];
618			deUint32		baseValue[2];
619			deUint32		distNdx;
620			int				dist;
621
622			baseR[0]		= extend4To8(R1);
623			baseG[0]		= extend4To8((G1a << 1) | G1b);
624			baseB[0]		= extend4To8((B1a << 3) | B1b);
625			baseR[1]		= extend4To8(R2);
626			baseG[1]		= extend4To8(G2);
627			baseB[1]		= extend4To8(B2);
628			baseValue[0]	= (((deUint32)baseR[0]) << 16) | (((deUint32)baseG[0]) << 8) | baseB[0];
629			baseValue[1]	= (((deUint32)baseR[1]) << 16) | (((deUint32)baseG[1]) << 8) | baseB[1];
630			distNdx			= (getBit(src, 34) << 2) | (getBit(src, 32) << 1) | (deUint32)(baseValue[0] >= baseValue[1]);
631			dist			= distTable[distNdx];
632
633			paintR[0]		= (deUint8)deClamp32((int)baseR[0] + dist, 0, 255);
634			paintG[0]		= (deUint8)deClamp32((int)baseG[0] + dist, 0, 255);
635			paintB[0]		= (deUint8)deClamp32((int)baseB[0] + dist, 0, 255);
636			paintR[1]		= (deUint8)deClamp32((int)baseR[0] - dist, 0, 255);
637			paintG[1]		= (deUint8)deClamp32((int)baseG[0] - dist, 0, 255);
638			paintB[1]		= (deUint8)deClamp32((int)baseB[0] - dist, 0, 255);
639			paintR[2]		= (deUint8)deClamp32((int)baseR[1] + dist, 0, 255);
640			paintG[2]		= (deUint8)deClamp32((int)baseG[1] + dist, 0, 255);
641			paintB[2]		= (deUint8)deClamp32((int)baseB[1] + dist, 0, 255);
642			paintR[3]		= (deUint8)deClamp32((int)baseR[1] - dist, 0, 255);
643			paintG[3]		= (deUint8)deClamp32((int)baseG[1] - dist, 0, 255);
644			paintB[3]		= (deUint8)deClamp32((int)baseB[1] - dist, 0, 255);
645		}
646
647		// Write final pixels for T or H mode.
648		for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
649		{
650			const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
651			const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
652			const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
653			const deUint32	paintNdx		= (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
654			const int		alphaDstOffset	= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
655
656			if (alphaMode && diffOpaqueBit == 0 && paintNdx == 2)
657			{
658				dst[dstOffset+0]			= 0;
659				dst[dstOffset+1]			= 0;
660				dst[dstOffset+2]			= 0;
661				alphaDst[alphaDstOffset]	= 0;
662			}
663			else
664			{
665				dst[dstOffset+0] = (deUint8)deClamp32((int)paintR[paintNdx], 0, 255);
666				dst[dstOffset+1] = (deUint8)deClamp32((int)paintG[paintNdx], 0, 255);
667				dst[dstOffset+2] = (deUint8)deClamp32((int)paintB[paintNdx], 0, 255);
668
669				if (alphaMode)
670					alphaDst[alphaDstOffset] = 255;
671			}
672		}
673	}
674	else
675	{
676		// Planar mode.
677		const deUint8 GO1	= (deUint8)getBit(src, 56);
678		const deUint8 GO2	= (deUint8)getBits(src, 49, 54);
679		const deUint8 BO1	= (deUint8)getBit(src, 48);
680		const deUint8 BO2	= (deUint8)getBits(src, 43, 44);
681		const deUint8 BO3	= (deUint8)getBits(src, 39, 41);
682		const deUint8 RH1	= (deUint8)getBits(src, 34, 38);
683		const deUint8 RH2	= (deUint8)getBit(src, 32);
684		const deUint8 RO	= extend6To8((deUint8)getBits(src, 57, 62));
685		const deUint8 GO	= extend7To8((GO1 << 6) | GO2);
686		const deUint8 BO	= extend6To8((BO1 << 5) | (BO2 << 3) | BO3);
687		const deUint8 RH	= extend6To8((RH1 << 1) | RH2);
688		const deUint8 GH	= extend7To8((deUint8)getBits(src, 25, 31));
689		const deUint8 BH	= extend6To8((deUint8)getBits(src, 19, 24));
690		const deUint8 RV	= extend6To8((deUint8)getBits(src, 13, 18));
691		const deUint8 GV	= extend7To8((deUint8)getBits(src, 6, 12));
692		const deUint8 BV	= extend6To8((deUint8)getBits(src, 0, 5));
693
694		// Write final pixels for planar mode.
695		for (int y = 0; y < 4; y++)
696		{
697			for (int x = 0; x < 4; x++)
698			{
699				const int dstOffset			= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
700				const int unclampedR		= (x * ((int)RH-(int)RO) + y * ((int)RV-(int)RO) + 4*(int)RO + 2) >> 2;
701				const int unclampedG		= (x * ((int)GH-(int)GO) + y * ((int)GV-(int)GO) + 4*(int)GO + 2) >> 2;
702				const int unclampedB		= (x * ((int)BH-(int)BO) + y * ((int)BV-(int)BO) + 4*(int)BO + 2) >> 2;
703				const int alphaDstOffset	= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
704
705				dst[dstOffset+0] = (deUint8)deClamp32(unclampedR, 0, 255);
706				dst[dstOffset+1] = (deUint8)deClamp32(unclampedG, 0, 255);
707				dst[dstOffset+2] = (deUint8)deClamp32(unclampedB, 0, 255);
708
709				if (alphaMode)
710					alphaDst[alphaDstOffset] = 255;
711			}
712		}
713	}
714}
715
716void decompressEAC8Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], deUint64 src)
717{
718	static const int modifierTable[16][8] =
719	{
720		{-3,  -6,  -9, -15,  2,  5,  8, 14},
721		{-3,  -7, -10, -13,  2,  6,  9, 12},
722		{-2,  -5,  -8, -13,  1,  4,  7, 12},
723		{-2,  -4,  -6, -13,  1,  3,  5, 12},
724		{-3,  -6,  -8, -12,  2,  5,  7, 11},
725		{-3,  -7,  -9, -11,  2,  6,  8, 10},
726		{-4,  -7,  -8, -11,  3,  6,  7, 10},
727		{-3,  -5,  -8, -11,  2,  4,  7, 10},
728		{-2,  -6,  -8, -10,  1,  5,  7,  9},
729		{-2,  -5,  -8, -10,  1,  4,  7,  9},
730		{-2,  -4,  -8, -10,  1,  3,  7,  9},
731		{-2,  -5,  -7, -10,  1,  4,  6,  9},
732		{-3,  -4,  -7, -10,  2,  3,  6,  9},
733		{-1,  -2,  -3, -10,  0,  1,  2,  9},
734		{-4,  -6,  -8,  -9,  3,  5,  7,  8},
735		{-3,  -5,  -7,  -9,  2,  4,  6,  8}
736	};
737
738	const deUint8	baseCodeword	= (deUint8)getBits(src, 56, 63);
739	const deUint8	multiplier		= (deUint8)getBits(src, 52, 55);
740	const deUint32	tableNdx		= getBits(src, 48, 51);
741
742	for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
743	{
744		const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
745		const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
746		const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8;
747		const int		pixelBitNdx		= 45 - 3*pixelNdx;
748		const deUint32	modifierNdx		= (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
749		const int		modifier		= modifierTable[tableNdx][modifierNdx];
750
751		dst[dstOffset] = (deUint8)deClamp32((int)baseCodeword + (int)multiplier*modifier, 0, 255);
752	}
753}
754
755void decompressEAC11Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11], deUint64 src, bool signedMode)
756{
757	static const int modifierTable[16][8] =
758	{
759		{-3,  -6,  -9, -15,  2,  5,  8, 14},
760		{-3,  -7, -10, -13,  2,  6,  9, 12},
761		{-2,  -5,  -8, -13,  1,  4,  7, 12},
762		{-2,  -4,  -6, -13,  1,  3,  5, 12},
763		{-3,  -6,  -8, -12,  2,  5,  7, 11},
764		{-3,  -7,  -9, -11,  2,  6,  8, 10},
765		{-4,  -7,  -8, -11,  3,  6,  7, 10},
766		{-3,  -5,  -8, -11,  2,  4,  7, 10},
767		{-2,  -6,  -8, -10,  1,  5,  7,  9},
768		{-2,  -5,  -8, -10,  1,  4,  7,  9},
769		{-2,  -4,  -8, -10,  1,  3,  7,  9},
770		{-2,  -5,  -7, -10,  1,  4,  6,  9},
771		{-3,  -4,  -7, -10,  2,  3,  6,  9},
772		{-1,  -2,  -3, -10,  0,  1,  2,  9},
773		{-4,  -6,  -8,  -9,  3,  5,  7,  8},
774		{-3,  -5,  -7,  -9,  2,  4,  6,  8}
775	};
776
777	const deInt32 multiplier	= (deInt32)getBits(src, 52, 55);
778	const deInt32 tableNdx		= (deInt32)getBits(src, 48, 51);
779	deInt32 baseCodeword		= (deInt32)getBits(src, 56, 63);
780
781	if (signedMode)
782	{
783		if (baseCodeword > 127)
784			baseCodeword -= 256;
785		if (baseCodeword == -128)
786			baseCodeword = -127;
787	}
788
789	for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
790	{
791		const int		x				= pixelNdx / ETC2_BLOCK_HEIGHT;
792		const int		y				= pixelNdx % ETC2_BLOCK_HEIGHT;
793		const int		dstOffset		= (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
794		const int		pixelBitNdx		= 45 - 3*pixelNdx;
795		const deUint32	modifierNdx		= (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
796		const int		modifier		= modifierTable[tableNdx][modifierNdx];
797
798		if (signedMode)
799		{
800			deInt16 value;
801
802			if (multiplier != 0)
803				value = (deInt16)deClamp32(baseCodeword*8 + multiplier*modifier*8, -1023, 1023);
804			else
805				value = (deInt16)deClamp32(baseCodeword*8 + modifier, -1023, 1023);
806
807			*((deInt16*)(dst + dstOffset)) = value;
808		}
809		else
810		{
811			deUint16 value;
812
813			if (multiplier != 0)
814				value = (deUint16)deClamp32(baseCodeword*8 + 4 + multiplier*modifier*8, 0, 2047);
815			else
816				value= (deUint16)deClamp32(baseCodeword*8 + 4 + modifier, 0, 2047);
817
818			*((deUint16*)(dst + dstOffset)) = value;
819		}
820	}
821}
822
823} // EtcDecompressInternal
824
825void decompressETC1 (const PixelBufferAccess& dst, const deUint8* src)
826{
827	using namespace EtcDecompressInternal;
828
829	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
830	const deUint64	compressedBlock = get64BitBlock(src, 0);
831
832	decompressETC1Block(dstPtr, compressedBlock);
833}
834
835void decompressETC2 (const PixelBufferAccess& dst, const deUint8* src)
836{
837	using namespace EtcDecompressInternal;
838
839	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
840	const deUint64	compressedBlock = get64BitBlock(src, 0);
841
842	decompressETC2Block(dstPtr, compressedBlock, NULL, false);
843}
844
845void decompressETC2_EAC_RGBA8 (const PixelBufferAccess& dst, const deUint8* src)
846{
847	using namespace EtcDecompressInternal;
848
849	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
850	const int		dstRowPitch		= dst.getRowPitch();
851	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
852
853	const deUint64	compressedBlockAlpha	= get128BitBlockStart(src, 0);
854	const deUint64	compressedBlockRGB		= get128BitBlockEnd(src, 0);
855	deUint8			uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
856	deUint8			uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
857
858	// Decompress.
859	decompressETC2Block(uncompressedBlockRGB, compressedBlockRGB, NULL, false);
860	decompressEAC8Block(uncompressedBlockAlpha, compressedBlockAlpha);
861
862	// Write to dst.
863	for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
864	{
865		for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
866		{
867			const deUint8* const	srcPixelRGB		= &uncompressedBlockRGB[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
868			const deUint8* const	srcPixelAlpha	= &uncompressedBlockAlpha[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
869			deUint8* const			dstPixel		= dstPtr + y*dstRowPitch + x*dstPixelSize;
870
871			DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
872			dstPixel[0] = srcPixelRGB[0];
873			dstPixel[1] = srcPixelRGB[1];
874			dstPixel[2] = srcPixelRGB[2];
875			dstPixel[3] = srcPixelAlpha[0];
876		}
877	}
878}
879
880void decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1 (const PixelBufferAccess& dst, const deUint8* src)
881{
882	using namespace EtcDecompressInternal;
883
884	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
885	const int		dstRowPitch		= dst.getRowPitch();
886	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
887
888	const deUint64	compressedBlockRGBA	= get64BitBlock(src, 0);
889	deUint8			uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
890	deUint8			uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
891
892	// Decompress.
893	decompressETC2Block(uncompressedBlockRGB, compressedBlockRGBA, uncompressedBlockAlpha, DE_TRUE);
894
895	// Write to dst.
896	for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
897	{
898		for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
899		{
900			const deUint8* const	srcPixel		= &uncompressedBlockRGB[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
901			const deUint8* const	srcPixelAlpha	= &uncompressedBlockAlpha[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
902			deUint8* const			dstPixel		= dstPtr + y*dstRowPitch + x*dstPixelSize;
903
904			DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
905			dstPixel[0] = srcPixel[0];
906			dstPixel[1] = srcPixel[1];
907			dstPixel[2] = srcPixel[2];
908			dstPixel[3] = srcPixelAlpha[0];
909		}
910	}
911}
912
913void decompressEAC_R11 (const PixelBufferAccess& dst, const deUint8* src, bool signedMode)
914{
915	using namespace EtcDecompressInternal;
916
917	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
918	const int		dstRowPitch		= dst.getRowPitch();
919	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
920
921	const deUint64	compressedBlock = get64BitBlock(src, 0);
922	deUint8			uncompressedBlock[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
923
924	// Decompress.
925	decompressEAC11Block(uncompressedBlock, compressedBlock, signedMode);
926
927	// Write to dst.
928	for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
929	{
930		for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
931		{
932			DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_R11 == 2);
933
934			if (signedMode)
935			{
936				const deInt16* const	srcPixel = (deInt16*)&uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
937				deInt16* const			dstPixel = (deInt16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
938
939				dstPixel[0] = extend11To16WithSign(srcPixel[0]);
940			}
941			else
942			{
943				const deUint16* const	srcPixel = (deUint16*)&uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
944				deUint16* const			dstPixel = (deUint16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
945
946				dstPixel[0] = extend11To16(srcPixel[0]);
947			}
948		}
949	}
950}
951
952void decompressEAC_RG11 (const PixelBufferAccess& dst, const deUint8* src, bool signedMode)
953{
954	using namespace EtcDecompressInternal;
955
956	deUint8* const	dstPtr			= (deUint8*)dst.getDataPtr();
957	const int		dstRowPitch		= dst.getRowPitch();
958	const int		dstPixelSize	= ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11;
959
960	const deUint64	compressedBlockR = get128BitBlockStart(src, 0);
961	const deUint64	compressedBlockG = get128BitBlockEnd(src, 0);
962	deUint8			uncompressedBlockR[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
963	deUint8			uncompressedBlockG[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
964
965	// Decompress.
966	decompressEAC11Block(uncompressedBlockR, compressedBlockR, signedMode);
967	decompressEAC11Block(uncompressedBlockG, compressedBlockG, signedMode);
968
969	// Write to dst.
970	for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
971	{
972		for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
973		{
974			DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11 == 4);
975
976			if (signedMode)
977			{
978				const deInt16* const	srcPixelR	= (deInt16*)&uncompressedBlockR[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
979				const deInt16* const	srcPixelG	= (deInt16*)&uncompressedBlockG[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
980				deInt16* const			dstPixel	= (deInt16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
981
982				dstPixel[0] = extend11To16WithSign(srcPixelR[0]);
983				dstPixel[1] = extend11To16WithSign(srcPixelG[0]);
984			}
985			else
986			{
987				const deUint16* const	srcPixelR	= (deUint16*)&uncompressedBlockR[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
988				const deUint16* const	srcPixelG	= (deUint16*)&uncompressedBlockG[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
989				deUint16* const			dstPixel	= (deUint16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
990
991				dstPixel[0] = extend11To16(srcPixelR[0]);
992				dstPixel[1] = extend11To16(srcPixelG[0]);
993			}
994		}
995	}
996}
997
998namespace ASTCDecompressInternal
999{
1000
1001enum
1002{
1003	ASTC_MAX_BLOCK_WIDTH	= 12,
1004	ASTC_MAX_BLOCK_HEIGHT	= 12
1005};
1006
1007inline deUint32 getBit (deUint32 src, int ndx)
1008{
1009	DE_ASSERT(de::inBounds(ndx, 0, 32));
1010	return (src >> ndx) & 1;
1011}
1012
1013inline deUint32 getBits (deUint32 src, int low, int high)
1014{
1015	const int numBits = (high-low) + 1;
1016	DE_ASSERT(de::inRange(numBits, 1, 32));
1017	return (src >> low) & ((1u<<numBits)-1);
1018}
1019
1020inline bool isBitSet (deUint32 src, int ndx)
1021{
1022	return getBit(src, ndx) != 0;
1023}
1024
1025inline deUint32 reverseBits (deUint32 src, int numBits)
1026{
1027	DE_ASSERT(de::inRange(numBits, 0, 32));
1028	deUint32 result = 0;
1029	for (int i = 0; i < numBits; i++)
1030		result |= ((src >> i) & 1) << (numBits-1-i);
1031	return result;
1032}
1033
1034inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits)
1035{
1036	DE_ASSERT(numSrcBits <= numDstBits);
1037	DE_ASSERT((src & ((1<<numSrcBits)-1)) == src);
1038	deUint32 dst = 0;
1039	for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
1040		dst |= shift >= 0 ? src << shift : src >> -shift;
1041	return dst;
1042}
1043
1044inline deInt32 signExtend (deInt32 src, int numSrcBits)
1045{
1046	DE_ASSERT(de::inRange(numSrcBits, 2, 31));
1047	const bool negative = (src & (1 << (numSrcBits-1))) != 0;
1048	return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
1049}
1050
1051inline bool isFloat16InfOrNan (deFloat16 v)
1052{
1053	return getBits(v, 10, 14) == 31;
1054}
1055
1056// A helper for getting bits from a 128-bit block.
1057class Block128
1058{
1059private:
1060	typedef deUint64 Word;
1061
1062	enum
1063	{
1064		WORD_BYTES	= sizeof(Word),
1065		WORD_BITS	= 8*WORD_BYTES,
1066		NUM_WORDS	= 128 / WORD_BITS
1067	};
1068
1069	DE_STATIC_ASSERT(128 % WORD_BITS == 0);
1070
1071public:
1072	Block128 (const deUint8* src)
1073	{
1074		for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1075		{
1076			m_words[wordNdx] = 0;
1077			for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
1078				m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx);
1079		}
1080	}
1081
1082	deUint32 getBit (int ndx) const
1083	{
1084		DE_ASSERT(de::inBounds(ndx, 0, 128));
1085		return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
1086	}
1087
1088	deUint32 getBits (int low, int high) const
1089	{
1090		DE_ASSERT(de::inBounds(low, 0, 128));
1091		DE_ASSERT(de::inBounds(high, 0, 128));
1092		DE_ASSERT(de::inRange(high-low+1, 0, 32));
1093
1094		if (high-low+1 == 0)
1095			return 0;
1096
1097		const int word0Ndx = low / WORD_BITS;
1098		const int word1Ndx = high / WORD_BITS;
1099
1100		// \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
1101
1102		if (word0Ndx == word1Ndx)
1103			return (m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS);
1104		else
1105		{
1106			DE_ASSERT(word1Ndx == word0Ndx + 1);
1107
1108			return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) |
1109				   (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS));
1110		}
1111	}
1112
1113	bool isBitSet (int ndx) const
1114	{
1115		DE_ASSERT(de::inBounds(ndx, 0, 128));
1116		return getBit(ndx) != 0;
1117	}
1118
1119private:
1120	Word m_words[NUM_WORDS];
1121};
1122
1123// A helper for sequential access into a Block128.
1124class BitAccessStream
1125{
1126public:
1127	BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward)
1128		: m_src				(src)
1129		, m_startNdxInSrc	(startNdxInSrc)
1130		, m_length			(length)
1131		, m_forward			(forward)
1132		, m_ndx				(0)
1133	{
1134	}
1135
1136	// Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
1137	deUint32 getNext (int num)
1138	{
1139		if (num == 0 || m_ndx >= m_length)
1140			return 0;
1141
1142		const int end				= m_ndx + num;
1143		const int numBitsFromSrc	= de::max(0, de::min(m_length, end) - m_ndx);
1144		const int low				= m_ndx;
1145		const int high				= m_ndx + numBitsFromSrc - 1;
1146
1147		m_ndx += num;
1148
1149		return m_forward ?			   m_src.getBits(m_startNdxInSrc + low,  m_startNdxInSrc + high)
1150						 : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
1151	}
1152
1153private:
1154	const Block128&		m_src;
1155	const int			m_startNdxInSrc;
1156	const int			m_length;
1157	const bool			m_forward;
1158
1159	int					m_ndx;
1160};
1161
1162enum ISEMode
1163{
1164	ISEMODE_TRIT = 0,
1165	ISEMODE_QUINT,
1166	ISEMODE_PLAIN_BIT,
1167
1168	ISEMODE_LAST
1169};
1170
1171struct ISEParams
1172{
1173	ISEMode		mode;
1174	int			numBits;
1175
1176	ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {}
1177};
1178
1179inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues)
1180{
1181	switch (iseParams.mode)
1182	{
1183		case ISEMODE_TRIT:			return divRoundUp(numValues*8, 5) + numValues*iseParams.numBits;
1184		case ISEMODE_QUINT:			return divRoundUp(numValues*7, 3) + numValues*iseParams.numBits;
1185		case ISEMODE_PLAIN_BIT:		return numValues*iseParams.numBits;
1186		default:
1187			DE_ASSERT(false);
1188			return -1;
1189	}
1190}
1191
1192struct ISEDecodedResult
1193{
1194	deUint32 m;
1195	deUint32 tq; //!< Trit or quint value, depending on ISE mode.
1196	deUint32 v;
1197};
1198
1199// Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
1200struct ASTCBlockMode
1201{
1202	bool		isError;
1203	// \note Following fields only relevant if !isError.
1204	bool		isVoidExtent;
1205	// \note Following fields only relevant if !isVoidExtent.
1206	bool		isDualPlane;
1207	int			weightGridWidth;
1208	int			weightGridHeight;
1209	ISEParams	weightISEParams;
1210
1211	ASTCBlockMode (void)
1212		: isError			(true)
1213		, isVoidExtent		(true)
1214		, isDualPlane		(true)
1215		, weightGridWidth	(-1)
1216		, weightGridHeight	(-1)
1217		, weightISEParams	(ISEMODE_LAST, -1)
1218	{
1219	}
1220};
1221
1222inline int computeNumWeights (const ASTCBlockMode& mode)
1223{
1224	return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
1225}
1226
1227struct ColorEndpointPair
1228{
1229	UVec4 e0;
1230	UVec4 e1;
1231};
1232
1233struct TexelWeightPair
1234{
1235	deUint32 w[2];
1236};
1237
1238ASTCBlockMode getASTCBlockMode (deUint32 blockModeData)
1239{
1240	ASTCBlockMode blockMode;
1241	blockMode.isError = true; // \note Set to false later, if not error.
1242
1243	blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
1244
1245	if (!blockMode.isVoidExtent)
1246	{
1247		if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0)
1248			return blockMode; // Invalid ("reserved").
1249
1250		deUint32 r = (deUint32)-1; // \note Set in the following branches.
1251
1252		if (getBits(blockModeData, 0, 1) == 0)
1253		{
1254			const deUint32 r0	= getBit(blockModeData, 4);
1255			const deUint32 r1	= getBit(blockModeData, 2);
1256			const deUint32 r2	= getBit(blockModeData, 3);
1257			const deUint32 i78	= getBits(blockModeData, 7, 8);
1258
1259			r = (r2 << 2) | (r1 << 1) | (r0 << 0);
1260
1261			if (i78 == 3)
1262			{
1263				const bool i5 = isBitSet(blockModeData, 5);
1264				blockMode.weightGridWidth	= i5 ? 10 : 6;
1265				blockMode.weightGridHeight	= i5 ? 6  : 10;
1266			}
1267			else
1268			{
1269				const deUint32 a = getBits(blockModeData, 5, 6);
1270				switch (i78)
1271				{
1272					case 0:		blockMode.weightGridWidth = 12;		blockMode.weightGridHeight = a + 2;									break;
1273					case 1:		blockMode.weightGridWidth = a + 2;	blockMode.weightGridHeight = 12;									break;
1274					case 2:		blockMode.weightGridWidth = a + 6;	blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;		break;
1275					default: DE_ASSERT(false);
1276				}
1277			}
1278		}
1279		else
1280		{
1281			const deUint32 r0	= getBit(blockModeData, 4);
1282			const deUint32 r1	= getBit(blockModeData, 0);
1283			const deUint32 r2	= getBit(blockModeData, 1);
1284			const deUint32 i23	= getBits(blockModeData, 2, 3);
1285			const deUint32 a	= getBits(blockModeData, 5, 6);
1286
1287			r = (r2 << 2) | (r1 << 1) | (r0 << 0);
1288
1289			if (i23 == 3)
1290			{
1291				const deUint32	b	= getBit(blockModeData, 7);
1292				const bool		i8	= isBitSet(blockModeData, 8);
1293				blockMode.weightGridWidth	= i8 ? b+2 : a+2;
1294				blockMode.weightGridHeight	= i8 ? a+2 : b+6;
1295			}
1296			else
1297			{
1298				const deUint32 b = getBits(blockModeData, 7, 8);
1299
1300				switch (i23)
1301				{
1302					case 0:		blockMode.weightGridWidth = b + 4;	blockMode.weightGridHeight = a + 2;	break;
1303					case 1:		blockMode.weightGridWidth = b + 8;	blockMode.weightGridHeight = a + 2;	break;
1304					case 2:		blockMode.weightGridWidth = a + 2;	blockMode.weightGridHeight = b + 8;	break;
1305					default: DE_ASSERT(false);
1306				}
1307			}
1308		}
1309
1310		const bool	zeroDH		= getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
1311		const bool	h			= zeroDH ? 0 : isBitSet(blockModeData, 9);
1312		blockMode.isDualPlane	= zeroDH ? 0 : isBitSet(blockModeData, 10);
1313
1314		{
1315			ISEMode&	m	= blockMode.weightISEParams.mode;
1316			int&		b	= blockMode.weightISEParams.numBits;
1317			m = ISEMODE_PLAIN_BIT;
1318			b = 0;
1319
1320			if (h)
1321			{
1322				switch (r)
1323				{
1324					case 2:							m = ISEMODE_QUINT;	b = 1;	break;
1325					case 3:		m = ISEMODE_TRIT;						b = 2;	break;
1326					case 4:												b = 4;	break;
1327					case 5:							m = ISEMODE_QUINT;	b = 2;	break;
1328					case 6:		m = ISEMODE_TRIT;						b = 3;	break;
1329					case 7:												b = 5;	break;
1330					default: DE_ASSERT(false);
1331				}
1332			}
1333			else
1334			{
1335				switch (r)
1336				{
1337					case 2: 											b = 1;	break;
1338					case 3: 	m = ISEMODE_TRIT;								break;
1339					case 4: 											b = 2;	break;
1340					case 5: 						m = ISEMODE_QUINT;			break;
1341					case 6: 	m = ISEMODE_TRIT;						b = 1;	break;
1342					case 7: 											b = 3;	break;
1343					default: DE_ASSERT(false);
1344				}
1345			}
1346		}
1347	}
1348
1349	blockMode.isError = false;
1350	return blockMode;
1351}
1352
1353inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB)
1354{
1355	if (isSRGB)
1356	{
1357		deUint8* const dstU = (deUint8*)dst;
1358
1359		for (int i = 0; i < blockWidth*blockHeight; i++)
1360		{
1361			dstU[4*i + 0] = 0xff;
1362			dstU[4*i + 1] = 0;
1363			dstU[4*i + 2] = 0xff;
1364			dstU[4*i + 3] = 0xff;
1365		}
1366	}
1367	else
1368	{
1369		float* const dstF = (float*)dst;
1370
1371		for (int i = 0; i < blockWidth*blockHeight; i++)
1372		{
1373			dstF[4*i + 0] = 1.0f;
1374			dstF[4*i + 1] = 0.0f;
1375			dstF[4*i + 2] = 1.0f;
1376			dstF[4*i + 3] = 1.0f;
1377		}
1378	}
1379}
1380
1381void decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode)
1382{
1383	const deUint32	minSExtent			= blockData.getBits(12, 24);
1384	const deUint32	maxSExtent			= blockData.getBits(25, 37);
1385	const deUint32	minTExtent			= blockData.getBits(38, 50);
1386	const deUint32	maxTExtent			= blockData.getBits(51, 63);
1387	const bool		allExtentsAllOnes	= minSExtent == 0x1fff && maxSExtent == 0x1fff && minTExtent == 0x1fff && maxTExtent == 0x1fff;
1388	const bool		isHDRBlock			= blockData.isBitSet(9);
1389
1390	if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
1391	{
1392		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1393		return;
1394	}
1395
1396	const deUint32 rgba[4] =
1397	{
1398		blockData.getBits(64,  79),
1399		blockData.getBits(80,  95),
1400		blockData.getBits(96,  111),
1401		blockData.getBits(112, 127)
1402	};
1403
1404	if (isSRGB)
1405	{
1406		deUint8* const dstU = (deUint8*)dst;
1407		for (int i = 0; i < blockWidth*blockHeight; i++)
1408		for (int c = 0; c < 4; c++)
1409			dstU[i*4 + c] = (rgba[c] & 0xff00) >> 8;
1410	}
1411	else
1412	{
1413		float* const dstF = (float*)dst;
1414
1415		if (isHDRBlock)
1416		{
1417			for (int c = 0; c < 4; c++)
1418			{
1419				if (isFloat16InfOrNan(rgba[c]))
1420					throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)");
1421			}
1422
1423			for (int i = 0; i < blockWidth*blockHeight; i++)
1424			for (int c = 0; c < 4; c++)
1425				dstF[i*4 + c] = deFloat16To32((deFloat16)rgba[c]);
1426		}
1427		else
1428		{
1429			for (int i = 0; i < blockWidth*blockHeight; i++)
1430			for (int c = 0; c < 4; c++)
1431				dstF[i*4 + c] = rgba[c] == 65535 ? 1.0f : (float)rgba[c] / 65536.0f;
1432		}
1433	}
1434
1435	return;
1436}
1437
1438void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart)
1439{
1440	if (numPartitions == 1)
1441		endpointModesDst[0] = blockData.getBits(13, 16);
1442	else
1443	{
1444		const deUint32 highLevelSelector = blockData.getBits(23, 24);
1445
1446		if (highLevelSelector == 0)
1447		{
1448			const deUint32 mode = blockData.getBits(25, 28);
1449			for (int i = 0; i < numPartitions; i++)
1450				endpointModesDst[i] = mode;
1451		}
1452		else
1453		{
1454			for (int partNdx = 0; partNdx < numPartitions; partNdx++)
1455			{
1456				const deUint32 cemClass		= highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
1457				const deUint32 lowBit0Ndx	= numPartitions + 2*partNdx;
1458				const deUint32 lowBit1Ndx	= numPartitions + 2*partNdx + 1;
1459				const deUint32 lowBit0		= blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4);
1460				const deUint32 lowBit1		= blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4);
1461
1462				endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
1463			}
1464		}
1465	}
1466}
1467
1468inline int computeNumColorEndpointValues (deUint32 endpointMode)
1469{
1470	DE_ASSERT(endpointMode < 16);
1471	return (endpointMode/4 + 1) * 2;
1472}
1473
1474int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions)
1475{
1476	int result = 0;
1477	for (int i = 0; i < numPartitions; i++)
1478		result += computeNumColorEndpointValues(endpointModes[i]);
1479	return result;
1480}
1481
1482void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
1483{
1484	DE_ASSERT(de::inRange(numValues, 1, 5));
1485
1486	deUint32 m[5];
1487
1488	m[0]			= data.getNext(numBits);
1489	deUint32 T01	= data.getNext(2);
1490	m[1]			= data.getNext(numBits);
1491	deUint32 T23	= data.getNext(2);
1492	m[2]			= data.getNext(numBits);
1493	deUint32 T4		= data.getNext(1);
1494	m[3]			= data.getNext(numBits);
1495	deUint32 T56	= data.getNext(2);
1496	m[4]			= data.getNext(numBits);
1497	deUint32 T7		= data.getNext(1);
1498
1499	switch (numValues)
1500	{
1501		// \note Fall-throughs.
1502		case 1: T23		= 0;
1503		case 2: T4		= 0;
1504		case 3: T56		= 0;
1505		case 4: T7		= 0;
1506		case 5: break;
1507		default:
1508			DE_ASSERT(false);
1509	}
1510
1511	const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
1512
1513	static const deUint32 tritsFromT[256][5] =
1514	{
1515		{ 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 },
1516		{ 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 },
1517		{ 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 },
1518		{ 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 },
1519		{ 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 },
1520		{ 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 },
1521		{ 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 },
1522		{ 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 },
1523		{ 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 },
1524		{ 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 },
1525		{ 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 },
1526		{ 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 },
1527		{ 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 },
1528		{ 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 },
1529		{ 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 },
1530		{ 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 }
1531	};
1532
1533	const deUint32 (& trits)[5] = tritsFromT[T];
1534
1535	for (int i = 0; i < numValues; i++)
1536	{
1537		dst[i].m	= m[i];
1538		dst[i].tq	= trits[i];
1539		dst[i].v	= (trits[i] << numBits) + m[i];
1540	}
1541}
1542
1543void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
1544{
1545	DE_ASSERT(de::inRange(numValues, 1, 3));
1546
1547	deUint32 m[3];
1548
1549	m[0]			= data.getNext(numBits);
1550	deUint32 Q012	= data.getNext(3);
1551	m[1]			= data.getNext(numBits);
1552	deUint32 Q34	= data.getNext(2);
1553	m[2]			= data.getNext(numBits);
1554	deUint32 Q56	= data.getNext(2);
1555
1556	switch (numValues)
1557	{
1558		// \note Fall-throughs.
1559		case 1: Q34		= 0;
1560		case 2: Q56		= 0;
1561		case 3: break;
1562		default:
1563			DE_ASSERT(false);
1564	}
1565
1566	const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
1567
1568	static const deUint32 quintsFromQ[256][3] =
1569	{
1570		{ 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 },
1571		{ 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 },
1572		{ 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 },
1573		{ 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 },
1574		{ 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 },
1575		{ 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 },
1576		{ 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 },
1577		{ 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 }
1578	};
1579
1580	const deUint32 (& quints)[3] = quintsFromQ[Q];
1581
1582	for (int i = 0; i < numValues; i++)
1583	{
1584		dst[i].m	= m[i];
1585		dst[i].tq	= quints[i];
1586		dst[i].v	= (quints[i] << numBits) + m[i];
1587	}
1588}
1589
1590inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits)
1591{
1592	dst[0].m = data.getNext(numBits);
1593	dst[0].v = dst[0].m;
1594}
1595
1596void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params)
1597{
1598	if (params.mode == ISEMODE_TRIT)
1599	{
1600		const int numBlocks = divRoundUp(numValues, 5);
1601		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
1602		{
1603			const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
1604			decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits);
1605		}
1606	}
1607	else if (params.mode == ISEMODE_QUINT)
1608	{
1609		const int numBlocks = divRoundUp(numValues, 3);
1610		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
1611		{
1612			const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
1613			decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits);
1614		}
1615	}
1616	else
1617	{
1618		DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
1619		for (int i = 0; i < numValues; i++)
1620			decodeISEBitBlock(&dst[i], data, params.numBits);
1621	}
1622}
1623
1624ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence)
1625{
1626	int curBitsForTritMode		= 6;
1627	int curBitsForQuintMode		= 5;
1628	int curBitsForPlainBitMode	= 8;
1629
1630	while (true)
1631	{
1632		DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
1633
1634		const int tritRange			= curBitsForTritMode > 0		? (3 << curBitsForTritMode) - 1			: -1;
1635		const int quintRange		= curBitsForQuintMode > 0		? (5 << curBitsForQuintMode) - 1		: -1;
1636		const int plainBitRange		= curBitsForPlainBitMode > 0	? (1 << curBitsForPlainBitMode) - 1		: -1;
1637		const int maxRange			= de::max(de::max(tritRange, quintRange), plainBitRange);
1638
1639		if (maxRange == tritRange)
1640		{
1641			const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
1642			if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
1643				return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
1644			curBitsForTritMode--;
1645		}
1646		else if (maxRange == quintRange)
1647		{
1648			const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
1649			if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
1650				return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
1651			curBitsForQuintMode--;
1652		}
1653		else
1654		{
1655			const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
1656			DE_ASSERT(maxRange == plainBitRange);
1657			if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
1658				return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
1659			curBitsForPlainBitMode--;
1660		}
1661	}
1662}
1663
1664void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams)
1665{
1666	if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
1667	{
1668		const int rangeCase				= iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
1669		DE_ASSERT(de::inRange(rangeCase, 0, 10));
1670		static const deUint32	Ca[11]	= { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
1671		const deUint32			C		= Ca[rangeCase];
1672
1673		for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
1674		{
1675			const deUint32 a = getBit(iseResults[endpointNdx].m, 0);
1676			const deUint32 b = getBit(iseResults[endpointNdx].m, 1);
1677			const deUint32 c = getBit(iseResults[endpointNdx].m, 2);
1678			const deUint32 d = getBit(iseResults[endpointNdx].m, 3);
1679			const deUint32 e = getBit(iseResults[endpointNdx].m, 4);
1680			const deUint32 f = getBit(iseResults[endpointNdx].m, 5);
1681
1682			const deUint32 A = a == 0 ? 0 : (1<<9)-1;
1683			const deUint32 B = rangeCase == 0	? 0
1684							 : rangeCase == 1	? 0
1685							 : rangeCase == 2	? (b << 8) |									(b << 4) |				(b << 2) |	(b << 1)
1686							 : rangeCase == 3	? (b << 8) |												(b << 3) |	(b << 2)
1687							 : rangeCase == 4	? (c << 8) | (b << 7) |										(c << 3) |	(b << 2) |	(c << 1) |	(b << 0)
1688							 : rangeCase == 5	? (c << 8) | (b << 7) |													(c << 2) |	(b << 1) |	(c << 0)
1689							 : rangeCase == 6	? (d << 8) | (c << 7) | (b << 6) |										(d << 2) |	(c << 1) |	(b << 0)
1690							 : rangeCase == 7	? (d << 8) | (c << 7) | (b << 6) |													(d << 1) |	(c << 0)
1691							 : rangeCase == 8	? (e << 8) | (d << 7) | (c << 6) | (b << 5) |										(e << 1) |	(d << 0)
1692							 : rangeCase == 9	? (e << 8) | (d << 7) | (c << 6) | (b << 5) |													(e << 0)
1693							 : rangeCase == 10	? (f << 8) | (e << 7) | (d << 6) | (c << 5) |	(b << 4) |										(f << 0)
1694							 : (deUint32)-1;
1695			DE_ASSERT(B != (deUint32)-1);
1696
1697			dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80);
1698		}
1699	}
1700	else
1701	{
1702		DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
1703
1704		for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
1705			dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
1706	}
1707}
1708
1709inline void bitTransferSigned (deInt32& a, deInt32& b)
1710{
1711	b >>= 1;
1712	b |= a & 0x80;
1713	a >>= 1;
1714	a &= 0x3f;
1715	if (isBitSet(a, 5))
1716		a -= 0x40;
1717}
1718
1719inline UVec4 clampedRGBA (const IVec4& rgba)
1720{
1721	return UVec4(de::clamp(rgba.x(), 0, 0xff),
1722				 de::clamp(rgba.y(), 0, 0xff),
1723				 de::clamp(rgba.z(), 0, 0xff),
1724				 de::clamp(rgba.w(), 0, 0xff));
1725}
1726
1727inline IVec4 blueContract (int r, int g, int b, int a)
1728{
1729	return IVec4((r+b)>>1, (g+b)>>1, b, a);
1730}
1731
1732inline bool isColorEndpointModeHDR (deUint32 mode)
1733{
1734	return mode == 2	||
1735		   mode == 3	||
1736		   mode == 7	||
1737		   mode == 11	||
1738		   mode == 14	||
1739		   mode == 15;
1740}
1741
1742void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3)
1743{
1744	const deUint32 m10		= getBit(v1, 7) | (getBit(v2, 7) << 1);
1745	const deUint32 m23		= getBits(v0, 6, 7);
1746	const deUint32 majComp	= m10 != 3	? m10
1747							: m23 != 3	? m23
1748							:			  0;
1749	const deUint32 mode		= m10 != 3	? m23
1750							: m23 != 3	? 4
1751							:			  5;
1752
1753	deInt32			red		= (deInt32)getBits(v0, 0, 5);
1754	deInt32			green	= (deInt32)getBits(v1, 0, 4);
1755	deInt32			blue	= (deInt32)getBits(v2, 0, 4);
1756	deInt32			scale	= (deInt32)getBits(v3, 0, 4);
1757
1758	{
1759#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
1760#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false)
1761
1762		const deUint32	x0	= getBit(v1, 6);
1763		const deUint32	x1	= getBit(v1, 5);
1764		const deUint32	x2	= getBit(v2, 6);
1765		const deUint32	x3	= getBit(v2, 5);
1766		const deUint32	x4	= getBit(v3, 7);
1767		const deUint32	x5	= getBit(v3, 6);
1768		const deUint32	x6	= getBit(v3, 5);
1769
1770		deInt32&		R	= red;
1771		deInt32&		G	= green;
1772		deInt32&		B	= blue;
1773		deInt32&		S	= scale;
1774
1775		switch (mode)
1776		{
1777			case 0: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,10,  R,6,  S,6,   S,5); break;
1778			case 1: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  R,10,  R,9); break;
1779			case 2: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,6,   S,7,  S,6,   S,5); break;
1780			case 3: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  S,6,   S,5); break;
1781			case 4: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  R,7,   S,5); break;
1782			case 5: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  S,6,   S,5); break;
1783			default:
1784				DE_ASSERT(false);
1785		}
1786
1787#undef ASSIGN_X_BITS
1788#undef SHOR
1789	}
1790
1791	static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 };
1792	DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
1793
1794	red		<<= shiftAmounts[mode];
1795	green	<<= shiftAmounts[mode];
1796	blue	<<= shiftAmounts[mode];
1797	scale	<<= shiftAmounts[mode];
1798
1799	if (mode != 5)
1800	{
1801		green	= red - green;
1802		blue	= red - blue;
1803	}
1804
1805	if (majComp == 1)
1806		std::swap(red, green);
1807	else if (majComp == 2)
1808		std::swap(red, blue);
1809
1810	e0 = UVec4(de::clamp(red	- scale,	0, 0xfff),
1811			   de::clamp(green	- scale,	0, 0xfff),
1812			   de::clamp(blue	- scale,	0, 0xfff),
1813			   0x780);
1814
1815	e1 = UVec4(de::clamp(red,				0, 0xfff),
1816			   de::clamp(green,				0, 0xfff),
1817			   de::clamp(blue,				0, 0xfff),
1818			   0x780);
1819}
1820
1821void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5)
1822{
1823	const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7);
1824
1825	if (major == 3)
1826	{
1827		e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780);
1828		e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780);
1829	}
1830	else
1831	{
1832		const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
1833
1834		deInt32 a	= (deInt32)((getBit(v1, 6) << 8) | v0);
1835		deInt32 c	= (deInt32)(getBits(v1, 0, 5));
1836		deInt32 b0	= (deInt32)(getBits(v2, 0, 5));
1837		deInt32 b1	= (deInt32)(getBits(v3, 0, 5));
1838		deInt32 d0	= (deInt32)(getBits(v4, 0, 4));
1839		deInt32 d1	= (deInt32)(getBits(v5, 0, 4));
1840
1841		{
1842#define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
1843#define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false)
1844
1845			const deUint32 x0 = getBit(v2, 6);
1846			const deUint32 x1 = getBit(v3, 6);
1847			const deUint32 x2 = getBit(v4, 6);
1848			const deUint32 x3 = getBit(v5, 6);
1849			const deUint32 x4 = getBit(v4, 5);
1850			const deUint32 x5 = getBit(v5, 5);
1851
1852			switch (mode)
1853			{
1854				case 0: ASSIGN_X_BITS(b0,6,  b1,6,   d0,6,  d1,6,  d0,5,  d1,5); break;
1855				case 1: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  d0,5,  d1,5); break;
1856				case 2: ASSIGN_X_BITS(a,9,   c,6,    d0,6,  d1,6,  d0,5,  d1,5); break;
1857				case 3: ASSIGN_X_BITS(b0,6,  b1,6,   a,9,   c,6,   d0,5,  d1,5); break;
1858				case 4: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  a,9,   a,10); break;
1859				case 5: ASSIGN_X_BITS(a,9,   a,10,   c,7,   c,6,   d0,5,  d1,5); break;
1860				case 6: ASSIGN_X_BITS(b0,6,  b1,6,   a,11,  c,6,   a,9,   a,10); break;
1861				case 7: ASSIGN_X_BITS(a,9,   a,10,   a,11,  c,6,   d0,5,  d1,5); break;
1862				default:
1863					DE_ASSERT(false);
1864			}
1865
1866#undef ASSIGN_X_BITS
1867#undef SHOR
1868		}
1869
1870		static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 };
1871		DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
1872
1873		d0 = signExtend(d0, numDBits[mode]);
1874		d1 = signExtend(d1, numDBits[mode]);
1875
1876		const int shiftAmount = (mode >> 1) ^ 3;
1877		a	<<= shiftAmount;
1878		c	<<= shiftAmount;
1879		b0	<<= shiftAmount;
1880		b1	<<= shiftAmount;
1881		d0	<<= shiftAmount;
1882		d1	<<= shiftAmount;
1883
1884		e0 = UVec4(de::clamp(a-c,			0, 0xfff),
1885				   de::clamp(a-b0-c-d0,		0, 0xfff),
1886				   de::clamp(a-b1-c-d1,		0, 0xfff),
1887				   0x780);
1888
1889		e1 = UVec4(de::clamp(a,				0, 0xfff),
1890				   de::clamp(a-b0,			0, 0xfff),
1891				   de::clamp(a-b1,			0, 0xfff),
1892				   0x780);
1893
1894		if (major == 1)
1895		{
1896			std::swap(e0.x(), e0.y());
1897			std::swap(e1.x(), e1.y());
1898		}
1899		else if (major == 2)
1900		{
1901			std::swap(e0.x(), e0.z());
1902			std::swap(e1.x(), e1.z());
1903		}
1904	}
1905}
1906
1907void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In)
1908{
1909	decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
1910
1911	const deUint32	mode	= (getBit(v7In, 7) << 1) | getBit(v6In, 7);
1912	deInt32			v6		= (deInt32)getBits(v6In, 0, 6);
1913	deInt32			v7		= (deInt32)getBits(v7In, 0, 6);
1914
1915	if (mode == 3)
1916	{
1917		e0.w() = v6 << 5;
1918		e1.w() = v7 << 5;
1919	}
1920	else
1921	{
1922		v6 |= (v7 << (mode+1)) & 0x780;
1923		v7 &= (0x3f >> mode);
1924		v7 ^= 0x20 >> mode;
1925		v7 -= 0x20 >> mode;
1926		v6 <<= 4-mode;
1927		v7 <<= 4-mode;
1928
1929		v7 += v6;
1930		v7 = de::clamp(v7, 0, 0xfff);
1931		e0.w() = v6;
1932		e1.w() = v7;
1933	}
1934}
1935
1936void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions)
1937{
1938	int unquantizedNdx = 0;
1939
1940	for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
1941	{
1942		const deUint32		endpointMode	= endpointModes[partitionNdx];
1943		const deUint32*		v				= &unquantizedEndpoints[unquantizedNdx];
1944		UVec4&				e0				= dst[partitionNdx].e0;
1945		UVec4&				e1				= dst[partitionNdx].e1;
1946
1947		unquantizedNdx += computeNumColorEndpointValues(endpointMode);
1948
1949		switch (endpointMode)
1950		{
1951			case 0:
1952				e0 = UVec4(v[0], v[0], v[0], 0xff);
1953				e1 = UVec4(v[1], v[1], v[1], 0xff);
1954				break;
1955
1956			case 1:
1957			{
1958				const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
1959				const deUint32 L1 = de::min(0xffu, L0 + getBits(v[1], 0, 5));
1960				e0 = UVec4(L0, L0, L0, 0xff);
1961				e1 = UVec4(L1, L1, L1, 0xff);
1962				break;
1963			}
1964
1965			case 2:
1966			{
1967				const deUint32 v1Gr		= v[1] >= v[0];
1968				const deUint32 y0		= v1Gr ? v[0]<<4 : (v[1]<<4) + 8;
1969				const deUint32 y1		= v1Gr ? v[1]<<4 : (v[0]<<4) - 8;
1970
1971				e0 = UVec4(y0, y0, y0, 0x780);
1972				e1 = UVec4(y1, y1, y1, 0x780);
1973				break;
1974			}
1975
1976			case 3:
1977			{
1978				const bool		m	= isBitSet(v[0], 7);
1979				const deUint32	y0	= m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2)
1980										: (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
1981				const deUint32	d	= m ? getBits(v[1], 0, 4) << 2
1982										: getBits(v[1], 0, 3) << 1;
1983				const deUint32	y1	= de::min(0xfffu, y0+d);
1984
1985				e0 = UVec4(y0, y0, y0, 0x780);
1986				e1 = UVec4(y1, y1, y1, 0x780);
1987				break;
1988			}
1989
1990			case 4:
1991				e0 = UVec4(v[0], v[0], v[0], v[2]);
1992				e1 = UVec4(v[1], v[1], v[1], v[3]);
1993				break;
1994
1995			case 5:
1996			{
1997				deInt32 v0 = (deInt32)v[0];
1998				deInt32 v1 = (deInt32)v[1];
1999				deInt32 v2 = (deInt32)v[2];
2000				deInt32 v3 = (deInt32)v[3];
2001				bitTransferSigned(v1, v0);
2002				bitTransferSigned(v3, v2);
2003
2004				e0 = clampedRGBA(IVec4(v0,		v0,		v0,		v2));
2005				e1 = clampedRGBA(IVec4(v0+v1,	v0+v1,	v0+v1,	v2+v3));
2006				break;
2007			}
2008
2009			case 6:
2010				e0 = UVec4((v[0]*v[3]) >> 8,	(v[1]*v[3]) >> 8,	(v[2]*v[3]) >> 8,	0xff);
2011				e1 = UVec4(v[0],				v[1],				v[2],				0xff);
2012				break;
2013
2014			case 7:
2015				decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
2016				break;
2017
2018			case 8:
2019				if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
2020				{
2021					e0 = UVec4(v[0], v[2], v[4], 0xff);
2022					e1 = UVec4(v[1], v[3], v[5], 0xff);
2023				}
2024				else
2025				{
2026					e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
2027					e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
2028				}
2029				break;
2030
2031			case 9:
2032			{
2033				deInt32 v0 = (deInt32)v[0];
2034				deInt32 v1 = (deInt32)v[1];
2035				deInt32 v2 = (deInt32)v[2];
2036				deInt32 v3 = (deInt32)v[3];
2037				deInt32 v4 = (deInt32)v[4];
2038				deInt32 v5 = (deInt32)v[5];
2039				bitTransferSigned(v1, v0);
2040				bitTransferSigned(v3, v2);
2041				bitTransferSigned(v5, v4);
2042
2043				if (v1+v3+v5 >= 0)
2044				{
2045					e0 = clampedRGBA(IVec4(v0,		v2,		v4,		0xff));
2046					e1 = clampedRGBA(IVec4(v0+v1,	v2+v3,	v4+v5,	0xff));
2047				}
2048				else
2049				{
2050					e0 = clampedRGBA(blueContract(v0+v1,	v2+v3,	v4+v5,	0xff));
2051					e1 = clampedRGBA(blueContract(v0,		v2,		v4,		0xff));
2052				}
2053				break;
2054			}
2055
2056			case 10:
2057				e0 = UVec4((v[0]*v[3]) >> 8,	(v[1]*v[3]) >> 8,	(v[2]*v[3]) >> 8,	v[4]);
2058				e1 = UVec4(v[0],				v[1],				v[2],				v[5]);
2059				break;
2060
2061			case 11:
2062				decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
2063				break;
2064
2065			case 12:
2066				if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
2067				{
2068					e0 = UVec4(v[0], v[2], v[4], v[6]);
2069					e1 = UVec4(v[1], v[3], v[5], v[7]);
2070				}
2071				else
2072				{
2073					e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
2074					e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
2075				}
2076				break;
2077
2078			case 13:
2079			{
2080				deInt32 v0 = (deInt32)v[0];
2081				deInt32 v1 = (deInt32)v[1];
2082				deInt32 v2 = (deInt32)v[2];
2083				deInt32 v3 = (deInt32)v[3];
2084				deInt32 v4 = (deInt32)v[4];
2085				deInt32 v5 = (deInt32)v[5];
2086				deInt32 v6 = (deInt32)v[6];
2087				deInt32 v7 = (deInt32)v[7];
2088				bitTransferSigned(v1, v0);
2089				bitTransferSigned(v3, v2);
2090				bitTransferSigned(v5, v4);
2091				bitTransferSigned(v7, v6);
2092
2093				if (v1+v3+v5 >= 0)
2094				{
2095					e0 = clampedRGBA(IVec4(v0,		v2,		v4,		v6));
2096					e1 = clampedRGBA(IVec4(v0+v1,	v2+v3,	v4+v5,	v6+v7));
2097				}
2098				else
2099				{
2100					e0 = clampedRGBA(blueContract(v0+v1,	v2+v3,	v4+v5,	v6+v7));
2101					e1 = clampedRGBA(blueContract(v0,		v2,		v4,		v6));
2102				}
2103
2104				break;
2105			}
2106
2107			case 14:
2108				decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
2109				e0.w() = v[6];
2110				e1.w() = v[7];
2111				break;
2112
2113			case 15:
2114				decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
2115				break;
2116
2117			default:
2118				DE_ASSERT(false);
2119		}
2120	}
2121}
2122
2123void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable)
2124{
2125	const int			colorEndpointDataStart = numPartitions == 1 ? 17 : 29;
2126	ISEDecodedResult	colorEndpointData[18];
2127
2128	{
2129		BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
2130		decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
2131	}
2132
2133	{
2134		deUint32 unquantizedEndpoints[18];
2135		unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
2136		decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
2137	}
2138}
2139
2140void unquantizeWeights (deUint32* dst, const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode)
2141{
2142	const int			numWeights	= computeNumWeights(blockMode);
2143	const ISEParams&	iseParams	= blockMode.weightISEParams;
2144
2145	if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
2146	{
2147		const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
2148
2149		if (rangeCase == 0 || rangeCase == 1)
2150		{
2151			static const deUint32 map0[3]	= { 0, 32, 63 };
2152			static const deUint32 map1[5]	= { 0, 16, 32, 47, 63 };
2153			const deUint32* const map		= rangeCase == 0 ? &map0[0] : &map1[0];
2154			for (int i = 0; i < numWeights; i++)
2155			{
2156				DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
2157				dst[i] = map[weightGrid[i].v];
2158			}
2159		}
2160		else
2161		{
2162			DE_ASSERT(rangeCase <= 6);
2163			static const deUint32	Ca[5]	= { 50, 28, 23, 13, 11 };
2164			const deUint32			C		= Ca[rangeCase-2];
2165
2166			for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2167			{
2168				const deUint32 a = getBit(weightGrid[weightNdx].m, 0);
2169				const deUint32 b = getBit(weightGrid[weightNdx].m, 1);
2170				const deUint32 c = getBit(weightGrid[weightNdx].m, 2);
2171
2172				const deUint32 A = a == 0 ? 0 : (1<<7)-1;
2173				const deUint32 B = rangeCase == 2 ? 0
2174								 : rangeCase == 3 ? 0
2175								 : rangeCase == 4 ? (b << 6) |					(b << 2) |				(b << 0)
2176								 : rangeCase == 5 ? (b << 6) |								(b << 1)
2177								 : rangeCase == 6 ? (c << 6) | (b << 5) |					(c << 1) |	(b << 0)
2178								 : (deUint32)-1;
2179
2180				dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20);
2181			}
2182		}
2183	}
2184	else
2185	{
2186		DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
2187
2188		for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2189			dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
2190	}
2191
2192	for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2193		dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
2194}
2195
2196void interpolateWeights (TexelWeightPair* dst, const deUint32* unquantizedWeights, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
2197{
2198	const int		numWeightsPerTexel	= blockMode.isDualPlane ? 2 : 1;
2199	const deUint32	scaleX				= (1024 + blockWidth/2) / (blockWidth-1);
2200	const deUint32	scaleY				= (1024 + blockHeight/2) / (blockHeight-1);
2201
2202	for (int texelY = 0; texelY < blockHeight; texelY++)
2203	{
2204		for (int texelX = 0; texelX < blockWidth; texelX++)
2205		{
2206			const deUint32 gX	= (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6;
2207			const deUint32 gY	= (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6;
2208			const deUint32 jX	= gX >> 4;
2209			const deUint32 jY	= gY >> 4;
2210			const deUint32 fX	= gX & 0xf;
2211			const deUint32 fY	= gY & 0xf;
2212			const deUint32 w11	= (fX*fY + 8) >> 4;
2213			const deUint32 w10	= fY - w11;
2214			const deUint32 w01	= fX - w11;
2215			const deUint32 w00	= 16 - fX - fY + w11;
2216			const deUint32 v0	= jY*blockMode.weightGridWidth + jX;
2217
2218			for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
2219			{
2220				const deUint32 p00	= unquantizedWeights[(v0)									* numWeightsPerTexel + texelWeightNdx];
2221				const deUint32 p01	= unquantizedWeights[(v0 + 1)								* numWeightsPerTexel + texelWeightNdx];
2222				const deUint32 p10	= unquantizedWeights[(v0 + blockMode.weightGridWidth)		* numWeightsPerTexel + texelWeightNdx];
2223				const deUint32 p11	= unquantizedWeights[(v0 + blockMode.weightGridWidth + 1)	* numWeightsPerTexel + texelWeightNdx];
2224
2225				dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
2226			}
2227		}
2228	}
2229}
2230
2231void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
2232{
2233	ISEDecodedResult weightGrid[64];
2234
2235	{
2236		BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
2237		decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
2238	}
2239
2240	{
2241		deUint32 unquantizedWeights[64];
2242		unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
2243		interpolateWeights(dst, &unquantizedWeights[0], blockWidth, blockHeight, blockMode);
2244	}
2245}
2246
2247inline deUint32 hash52 (deUint32 v)
2248{
2249	deUint32 p = v;
2250	p ^= p >> 15;	p -= p << 17;	p += p << 7;	p += p << 4;
2251	p ^= p >>  5;	p += p << 16;	p ^= p >> 7;	p ^= p >> 3;
2252	p ^= p <<  6;	p ^= p >> 17;
2253	return p;
2254}
2255
2256int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock)
2257{
2258	DE_ASSERT(zIn == 0);
2259	const deUint32	x		= smallBlock ? xIn << 1 : xIn;
2260	const deUint32	y		= smallBlock ? yIn << 1 : yIn;
2261	const deUint32	z		= smallBlock ? zIn << 1 : zIn;
2262	const deUint32	seed	= seedIn + 1024*(numPartitions-1);
2263	const deUint32	rnum	= hash52(seed);
2264	deUint8			seed1	=  rnum							& 0xf;
2265	deUint8			seed2	= (rnum >>  4)					& 0xf;
2266	deUint8			seed3	= (rnum >>  8)					& 0xf;
2267	deUint8			seed4	= (rnum >> 12)					& 0xf;
2268	deUint8			seed5	= (rnum >> 16)					& 0xf;
2269	deUint8			seed6	= (rnum >> 20)					& 0xf;
2270	deUint8			seed7	= (rnum >> 24)					& 0xf;
2271	deUint8			seed8	= (rnum >> 28)					& 0xf;
2272	deUint8			seed9	= (rnum >> 18)					& 0xf;
2273	deUint8			seed10	= (rnum >> 22)					& 0xf;
2274	deUint8			seed11	= (rnum >> 26)					& 0xf;
2275	deUint8			seed12	= ((rnum >> 30) | (rnum << 2))	& 0xf;
2276
2277	seed1 *= seed1;		seed5 *= seed5;		seed9  *= seed9;
2278	seed2 *= seed2;		seed6 *= seed6;		seed10 *= seed10;
2279	seed3 *= seed3;		seed7 *= seed7;		seed11 *= seed11;
2280	seed4 *= seed4;		seed8 *= seed8;		seed12 *= seed12;
2281
2282	const int shA = (seed & 2) != 0		? 4		: 5;
2283	const int shB = numPartitions == 3	? 6		: 5;
2284	const int sh1 = (seed & 1) != 0		? shA	: shB;
2285	const int sh2 = (seed & 1) != 0		? shB	: shA;
2286	const int sh3 = (seed & 0x10) != 0	? sh1	: sh2;
2287
2288	seed1 >>= sh1;		seed2  >>= sh2;		seed3  >>= sh1;		seed4  >>= sh2;
2289	seed5 >>= sh1;		seed6  >>= sh2;		seed7  >>= sh1;		seed8  >>= sh2;
2290	seed9 >>= sh3;		seed10 >>= sh3;		seed11 >>= sh3;		seed12 >>= sh3;
2291
2292	const int a =						0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14));
2293	const int b =						0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10));
2294	const int c = numPartitions >= 3 ?	0x3f & (seed5*x + seed6*y + seed9*z  + (rnum >>  6))	: 0;
2295	const int d = numPartitions >= 4 ?	0x3f & (seed7*x + seed8*y + seed10*z + (rnum >>  2))	: 0;
2296
2297	return a >= b && a >= c && a >= d	? 0
2298		 : b >= c && b >= d				? 1
2299		 : c >= d						? 2
2300		 :								  3;
2301}
2302
2303void setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed,
2304							int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes)
2305{
2306	const bool	smallBlock = blockWidth*blockHeight < 31;
2307	bool		isHDREndpoint[4];
2308
2309	for (int i = 0; i < numPartitions; i++)
2310		isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
2311
2312	for (int texelY = 0; texelY < blockHeight; texelY++)
2313	for (int texelX = 0; texelX < blockWidth; texelX++)
2314	{
2315		const int				texelNdx			= texelY*blockWidth + texelX;
2316		const int				colorEndpointNdx	= numPartitions == 1 ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock);
2317		DE_ASSERT(colorEndpointNdx < numPartitions);
2318		const UVec4&			e0					= colorEndpoints[colorEndpointNdx].e0;
2319		const UVec4&			e1					= colorEndpoints[colorEndpointNdx].e1;
2320		const TexelWeightPair&	weight				= texelWeights[texelNdx];
2321
2322		if (isLDRMode && isHDREndpoint[colorEndpointNdx])
2323		{
2324			if (isSRGB)
2325			{
2326				((deUint8*)dst)[texelNdx*4 + 0] = 0xff;
2327				((deUint8*)dst)[texelNdx*4 + 1] = 0;
2328				((deUint8*)dst)[texelNdx*4 + 2] = 0xff;
2329				((deUint8*)dst)[texelNdx*4 + 3] = 0xff;
2330			}
2331			else
2332			{
2333				((float*)dst)[texelNdx*4 + 0] = 1.0f;
2334				((float*)dst)[texelNdx*4 + 1] = 0;
2335				((float*)dst)[texelNdx*4 + 2] = 1.0f;
2336				((float*)dst)[texelNdx*4 + 3] = 1.0f;
2337			}
2338		}
2339		else
2340		{
2341			for (int channelNdx = 0; channelNdx < 4; channelNdx++)
2342			{
2343				if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR.
2344				{
2345					const deUint32 c0	= (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
2346					const deUint32 c1	= (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
2347					const deUint32 w	= weight.w[ccs == channelNdx ? 1 : 0];
2348					const deUint32 c	= (c0*(64-w) + c1*w + 32) / 64;
2349
2350					if (isSRGB)
2351						((deUint8*)dst)[texelNdx*4 + channelNdx] = (c & 0xff00) >> 8;
2352					else
2353						((float*)dst)[texelNdx*4 + channelNdx] = c == 65535 ? 1.0f : (float)c / 65536.0f;
2354				}
2355				else
2356				{
2357					DE_STATIC_ASSERT((isSameType<deFloat16, deUint16>::V));
2358					const deUint32		c0	= e0[channelNdx] << 4;
2359					const deUint32		c1	= e1[channelNdx] << 4;
2360					const deUint32		w	= weight.w[ccs == channelNdx ? 1 : 0];
2361					const deUint32		c	= (c0*(64-w) + c1*w + 32) / 64;
2362					const deUint32		e	= getBits(c, 11, 15);
2363					const deUint32		m	= getBits(c, 0, 10);
2364					const deUint32		mt	= m < 512		? 3*m
2365											: m >= 1536		? 5*m - 2048
2366											:				  4*m - 512;
2367					const deFloat16		cf	= (e << 10) + (mt >> 3);
2368
2369					((float*)dst)[texelNdx*4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
2370				}
2371			}
2372		}
2373	}
2374}
2375
2376void decompressASTCBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
2377{
2378	DE_ASSERT(isLDR || !isSRGB);
2379
2380	// Decode block mode.
2381
2382	const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
2383
2384	// Check for block mode errors.
2385
2386	if (blockMode.isError)
2387	{
2388		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
2389		return;
2390	}
2391
2392	// Separate path for void-extent.
2393
2394	if (blockMode.isVoidExtent)
2395	{
2396		decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
2397		return;
2398	}
2399
2400	// Compute weight grid values.
2401
2402	const int numWeights			= computeNumWeights(blockMode);
2403	const int numWeightDataBits		= computeNumRequiredBits(blockMode.weightISEParams, numWeights);
2404	const int numPartitions			= (int)blockData.getBits(11, 12) + 1;
2405
2406	// Check for errors in weight grid, partition and dual-plane parameters.
2407
2408	if (numWeights > 64								||
2409		numWeightDataBits > 96						||
2410		numWeightDataBits < 24						||
2411		blockMode.weightGridWidth > blockWidth		||
2412		blockMode.weightGridHeight > blockHeight	||
2413		(numPartitions == 4 && blockMode.isDualPlane))
2414	{
2415		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
2416		return;
2417	}
2418
2419	// Compute number of bits available for color endpoint data.
2420
2421	const bool	isSingleUniqueCem			= numPartitions == 1 || blockData.getBits(23, 24) == 0;
2422	const int	numConfigDataBits			= (numPartitions == 1 ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) +
2423											  (blockMode.isDualPlane ? 2 : 0);
2424	const int	numBitsForColorEndpoints	= 128 - numWeightDataBits - numConfigDataBits;
2425	const int	extraCemBitsStart			= 127 - numWeightDataBits - (isSingleUniqueCem		? -1
2426																		: numPartitions == 4	? 7
2427																		: numPartitions == 3	? 4
2428																		: numPartitions == 2	? 1
2429																		: 0);
2430	// Decode color endpoint modes.
2431
2432	deUint32 colorEndpointModes[4];
2433	decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
2434
2435	const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
2436
2437	// Check for errors in color endpoint value count.
2438
2439	if (numColorEndpointValues > 18 || numBitsForColorEndpoints < divRoundUp(13*numColorEndpointValues, 5))
2440	{
2441		setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
2442		return;
2443	}
2444
2445	// Compute color endpoints.
2446
2447	ColorEndpointPair colorEndpoints[4];
2448	computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
2449						  computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints);
2450
2451	// Compute texel weights.
2452
2453	TexelWeightPair texelWeights[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT];
2454	computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
2455
2456	// Set texel colors.
2457
2458	const int		ccs						= blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1;
2459	const deUint32	partitionIndexSeed		= numPartitions > 1 ? blockData.getBits(13, 22) : (deUint32)-1;
2460
2461	setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
2462}
2463
2464} // ASTCDecompressInternal
2465
2466void decompressASTC (const PixelBufferAccess& dst, const deUint8* data, bool isSRGB, bool isLDR)
2467{
2468	using namespace ASTCDecompressInternal;
2469
2470	DE_ASSERT(isLDR || !isSRGB);
2471
2472	const int blockWidth = dst.getWidth();
2473	const int blockHeight = dst.getHeight();
2474
2475	union
2476	{
2477		deUint8		sRGB[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT*4];
2478		float		linear[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT*4];
2479	} decompressedBuffer;
2480
2481	const Block128 blockData(data);
2482	decompressASTCBlock(isSRGB ? (void*)&decompressedBuffer.sRGB[0] : (void*)&decompressedBuffer.linear[0],
2483						blockData, dst.getWidth(), dst.getHeight(), isSRGB, isLDR);
2484
2485	if (isSRGB)
2486	{
2487		for (int i = 0; i < blockHeight; i++)
2488		for (int j = 0; j < blockWidth; j++)
2489		{
2490			dst.setPixel(IVec4(decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 0],
2491									decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 1],
2492									decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 2],
2493									decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 3]), j, i);
2494		}
2495	}
2496	else
2497	{
2498		for (int i = 0; i < blockHeight; i++)
2499		for (int j = 0; j < blockWidth; j++)
2500		{
2501			dst.setPixel(Vec4(decompressedBuffer.linear[(i*blockWidth + j) * 4 + 0],
2502								   decompressedBuffer.linear[(i*blockWidth + j) * 4 + 1],
2503								   decompressedBuffer.linear[(i*blockWidth + j) * 4 + 2],
2504								   decompressedBuffer.linear[(i*blockWidth + j) * 4 + 3]), j, i);
2505		}
2506	}
2507}
2508
2509void decompressBlock (CompressedTexFormat format, const PixelBufferAccess& dst, const deUint8* src, const TexDecompressionParams& params)
2510{
2511	// No 3D blocks supported right now
2512	DE_ASSERT(dst.getDepth() == 1);
2513
2514	switch (format)
2515	{
2516		case COMPRESSEDTEXFORMAT_ETC1_RGB8:							decompressETC1							(dst, src);			break;
2517		case COMPRESSEDTEXFORMAT_EAC_R11:							decompressEAC_R11						(dst, src, false);	break;
2518		case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:					decompressEAC_R11						(dst, src, true);	break;
2519		case COMPRESSEDTEXFORMAT_EAC_RG11:							decompressEAC_RG11						(dst, src, false);	break;
2520		case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:					decompressEAC_RG11						(dst, src, true);	break;
2521		case COMPRESSEDTEXFORMAT_ETC2_RGB8:							decompressETC2							(dst, src);			break;
2522		case COMPRESSEDTEXFORMAT_ETC2_SRGB8:						decompressETC2							(dst, src);			break;
2523		case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:		decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1	(dst, src);			break;
2524		case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:	decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1	(dst, src);			break;
2525		case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:					decompressETC2_EAC_RGBA8				(dst, src);			break;
2526		case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:				decompressETC2_EAC_RGBA8				(dst, src);			break;
2527
2528		case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
2529		case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
2530		case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
2531		case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
2532		case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
2533		case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
2534		case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
2535		case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
2536		case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
2537		case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
2538		case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
2539		case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
2540		case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
2541		case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
2542		case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
2543		case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
2544		case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
2545		case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
2546		case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
2547		case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
2548		case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
2549		case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
2550		case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
2551		case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
2552		case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
2553		case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
2554		case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
2555		case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
2556		{
2557			DE_ASSERT(params.astcMode == TexDecompressionParams::ASTCMODE_LDR || params.astcMode == TexDecompressionParams::ASTCMODE_HDR);
2558
2559			const bool isSRGBFormat = isAstcSRGBFormat(format);
2560			decompressASTC(dst, src, isSRGBFormat, isSRGBFormat || params.astcMode == TexDecompressionParams::ASTCMODE_LDR);
2561
2562			break;
2563		}
2564
2565		default:
2566			DE_ASSERT(false);
2567			break;
2568	}
2569}
2570
2571int componentSum (const IVec3& vec)
2572{
2573	return vec.x() + vec.y() + vec.z();
2574}
2575
2576} // anonymous
2577
2578void decompress (const PixelBufferAccess& dst, CompressedTexFormat fmt, const deUint8* src, const TexDecompressionParams& params)
2579{
2580	const int				blockSize			= getBlockSize(fmt);
2581	const IVec3				blockPixelSize		(getBlockPixelSize(fmt));
2582	const IVec3				blockCount			(divRoundUp(dst.getWidth(),		blockPixelSize.x()),
2583												 divRoundUp(dst.getHeight(),	blockPixelSize.y()),
2584												 divRoundUp(dst.getDepth(),		blockPixelSize.z()));
2585	const IVec3				blockPitches		(blockSize, blockSize * blockCount.x(), blockSize * blockCount.x() * blockCount.y());
2586
2587	std::vector<deUint8>	uncompressedBlock	(dst.getFormat().getPixelSize() * blockPixelSize.x() * blockPixelSize.y() * blockPixelSize.z());
2588	const PixelBufferAccess	blockAccess			(getUncompressedFormat(fmt), blockPixelSize.x(), blockPixelSize.y(), blockPixelSize.z(), &uncompressedBlock[0]);
2589
2590	DE_ASSERT(dst.getFormat() == getUncompressedFormat(fmt));
2591
2592	for (int blockZ = 0; blockZ < blockCount.z(); blockZ++)
2593	for (int blockY = 0; blockY < blockCount.y(); blockY++)
2594	for (int blockX = 0; blockX < blockCount.x(); blockX++)
2595	{
2596		const IVec3				blockPos	(blockX, blockY, blockZ);
2597		const deUint8* const	blockPtr	= src + componentSum(blockPos * blockPitches);
2598		const IVec3				copySize	(de::min(blockPixelSize.x(), dst.getWidth()		- blockPos.x() * blockPixelSize.x()),
2599											 de::min(blockPixelSize.y(), dst.getHeight()	- blockPos.y() * blockPixelSize.y()),
2600											 de::min(blockPixelSize.z(), dst.getDepth()		- blockPos.z() * blockPixelSize.z()));
2601		const IVec3				dstPixelPos	= blockPos * blockPixelSize;
2602
2603		decompressBlock(fmt, blockAccess, blockPtr, params);
2604
2605		copyRawPixels(getSubregion(dst, dstPixelPos.x(), dstPixelPos.y(), dstPixelPos.z(), copySize.x(), copySize.y(), copySize.z()), getSubregion(blockAccess, 0, 0, 0, copySize.x(), copySize.y(), copySize.z()));
2606	}
2607}
2608
2609CompressedTexture::CompressedTexture (void)
2610	: m_format	(COMPRESSEDTEXFORMAT_LAST)
2611	, m_width	(0)
2612	, m_height	(0)
2613	, m_depth	(0)
2614{
2615}
2616
2617CompressedTexture::CompressedTexture (CompressedTexFormat format, int width, int height, int depth)
2618	: m_format	(COMPRESSEDTEXFORMAT_LAST)
2619	, m_width	(0)
2620	, m_height	(0)
2621	, m_depth	(0)
2622{
2623	setStorage(format, width, height, depth);
2624}
2625
2626CompressedTexture::~CompressedTexture (void)
2627{
2628}
2629
2630void CompressedTexture::setStorage (CompressedTexFormat format, int width, int height, int depth)
2631{
2632	m_format	= format;
2633	m_width		= width;
2634	m_height	= height;
2635	m_depth		= depth;
2636
2637	if (isAstcFormat(m_format) && m_depth > 1)
2638		throw InternalError("3D ASTC textures not currently supported");
2639
2640	if (m_format != COMPRESSEDTEXFORMAT_LAST)
2641	{
2642		const IVec3	blockPixelSize	= getBlockPixelSize(m_format);
2643		const int	blockSize		= getBlockSize(m_format);
2644
2645		m_data.resize(divRoundUp(m_width, blockPixelSize.x()) * divRoundUp(m_height, blockPixelSize.y()) * divRoundUp(m_depth, blockPixelSize.z()) * blockSize);
2646	}
2647	else
2648	{
2649		DE_ASSERT(m_format == COMPRESSEDTEXFORMAT_LAST);
2650		DE_ASSERT(m_width == 0 && m_height == 0 && m_depth == 0);
2651		m_data.resize(0);
2652	}
2653}
2654
2655/*--------------------------------------------------------------------*//*!
2656 * \brief Decode to uncompressed pixel data
2657 * \param dst Destination buffer
2658 *//*--------------------------------------------------------------------*/
2659void CompressedTexture::decompress (const PixelBufferAccess& dst, const TexDecompressionParams& params) const
2660{
2661	DE_ASSERT(dst.getWidth() == m_width && dst.getHeight() == m_height && dst.getDepth() == m_depth);
2662	DE_ASSERT(dst.getFormat() == getUncompressedFormat(m_format));
2663
2664	tcu::decompress(dst, m_format, &m_data[0], params);
2665}
2666
2667} // tcu
2668