1#ifndef _TCUFLOAT_HPP
2#define _TCUFLOAT_HPP
3/*-------------------------------------------------------------------------
4 * drawElements Quality Program Tester Core
5 * ----------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Reconfigurable floating-point value template.
24 *//*--------------------------------------------------------------------*/
25
26#include "tcuDefs.hpp"
27
28// For memcpy().
29#include <string.h>
30
31namespace tcu
32{
33
34enum FloatFlags
35{
36	FLOAT_HAS_SIGN			= (1<<0),
37	FLOAT_SUPPORT_DENORM	= (1<<1)
38};
39
40/*--------------------------------------------------------------------*//*!
41 * \brief Floating-point format template
42 *
43 * This template implements arbitrary floating-point handling. Template
44 * can be used for conversion between different formats and checking
45 * various properties of floating-point values.
46 *//*--------------------------------------------------------------------*/
47template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
48class Float
49{
50public:
51	typedef StorageType_ StorageType;
52
53	enum
54	{
55		EXPONENT_BITS	= ExponentBits,
56		MANTISSA_BITS	= MantissaBits,
57		EXPONENT_BIAS	= ExponentBias,
58		FLAGS			= Flags,
59	};
60
61							Float			(void);
62	explicit				Float			(StorageType value);
63	explicit				Float			(float v);
64	explicit				Float			(double v);
65
66	template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
67	static Float			convert			(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
68
69	static inline Float		convert			(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
70
71	/*--------------------------------------------------------------------*//*!
72	 * \brief Construct floating point value
73	 * \param sign		Sign. Must be +1/-1
74	 * \param exponent	Exponent in range [1-ExponentBias, ExponentBias+1]
75	 * \param mantissa	Mantissa bits with implicit leading bit explicitly set
76	 * \return The specified float
77	 *
78	 * This function constructs a floating point value from its inputs.
79	 * The normally implicit leading bit of the mantissa must be explicitly set.
80	 * The exponent normally used for zero/subnormals is an invalid input. Such
81	 * values are specified with the leading mantissa bit of zero and the lowest
82	 * normal exponent (1-ExponentBias). Additionally having both exponent and
83	 * mantissa set to zero is a shorthand notation for the correctly signed
84	 * floating point zero. Inf and NaN must be specified directly with an
85	 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
86	 * bit set)
87	 *//*--------------------------------------------------------------------*/
88	static inline Float		construct		(int sign, int exponent, StorageType mantissa);
89
90	/*--------------------------------------------------------------------*//*!
91	 * \brief Construct floating point value. Explicit version
92	 * \param sign		Sign. Must be +1/-1
93	 * \param exponent	Exponent in range [-ExponentBias, ExponentBias+1]
94	 * \param mantissa	Mantissa bits
95	 * \return The specified float
96	 *
97	 * This function constructs a floating point value from its inputs with
98	 * minimal intervention.
99	 * The sign is turned into a sign bit and the exponent bias is added.
100	 * See IEEE-754 for additional information on the inputs and
101	 * the encoding of special values.
102	 *//*--------------------------------------------------------------------*/
103	static Float			constructBits	(int sign, int exponent, StorageType mantissaBits);
104
105	StorageType				bits			(void) const	{ return m_value;															}
106	float					asFloat			(void) const;
107	double					asDouble		(void) const;
108
109	inline int				signBit			(void) const	{ return (m_value >> (ExponentBits+MantissaBits)) & 1;						}
110	inline StorageType		exponentBits	(void) const	{ return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1);	}
111	inline StorageType		mantissaBits	(void) const	{ return m_value & ((StorageType(1)<<MantissaBits)-1);						}
112
113	inline int				sign			(void) const	{ return signBit() ? -1 : 1;																			}
114	inline int				exponent		(void) const	{ return isDenorm() ? 1	- ExponentBias : (int)exponentBits() - ExponentBias;							}
115	inline StorageType		mantissa		(void) const	{ return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits));	}
116
117	inline bool				isInf			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() == 0;	}
118	inline bool				isNaN			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() != 0;	}
119	inline bool				isZero			(void) const	{ return exponentBits() == 0						&& mantissaBits() == 0;	}
120	inline bool				isDenorm		(void) const	{ return exponentBits() == 0						&& mantissaBits() != 0;	}
121
122	static Float			zero			(int sign);
123	static Float			inf				(int sign);
124	static Float			nan				(void);
125
126private:
127	StorageType				m_value;
128};
129
130// Common floating-point types.
131typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float16;	//!< IEEE 754-2008 16-bit floating-point value
132typedef Float<deUint32,  8, 23,  127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float32;	//!< IEEE 754 32-bit floating-point value
133typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float64;	//!< IEEE 754 64-bit floating-point value
134
135template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
136inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
137	: m_value(0)
138{
139}
140
141template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
142inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
143	: m_value(value)
144{
145}
146
147template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
148inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
149	: m_value(0)
150{
151	deUint32 u32;
152	memcpy(&u32, &value, sizeof(deUint32));
153	*this = convert(Float32(u32));
154}
155
156template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
157inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
158	: m_value(0)
159{
160	deUint64 u64;
161	memcpy(&u64, &value, sizeof(deUint64));
162	*this = convert(Float64(u64));
163}
164
165template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
166inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
167{
168	float		v;
169	deUint32	u32		= Float32::convert(*this).bits();
170	memcpy(&v, &u32, sizeof(deUint32));
171	return v;
172}
173
174template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
175inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
176{
177	double		v;
178	deUint64	u64		= Float64::convert(*this).bits();
179	memcpy(&v, &u64, sizeof(deUint64));
180	return v;
181}
182
183template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
184inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
185{
186	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
187	return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
188}
189
190template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
191inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
192{
193	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
194	return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
195}
196
197template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
198inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
199{
200	return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
201}
202
203template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
204Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
205Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
206	(int sign, int exponent, StorageType mantissa)
207{
208	// Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
209	const bool			isShorthandZero	= exponent == 0 && mantissa == 0;
210
211	// Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
212	// Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
213	const bool			isDenormOrZero	= (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
214	const StorageType	s				= StorageType(sign < 0 ? 1 : 0) << (ExponentBits+MantissaBits);
215	const StorageType	exp				= (isShorthandZero  || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
216
217	DE_ASSERT(sign == +1 || sign == -1);
218	DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
219	DE_ASSERT(exp >> ExponentBits == 0);
220
221	return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
222}
223
224template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
225Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
226Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
227	(int sign, int exponent, StorageType mantissaBits)
228{
229	const StorageType signBit		= sign < 0 ? 1 : 0;
230	const StorageType exponentBits	= exponent + ExponentBias;
231
232	DE_ASSERT(sign == +1 || sign == -1 );
233	DE_ASSERT(exponentBits >> ExponentBits == 0);
234	DE_ASSERT(mantissaBits >> MantissaBits == 0);
235
236	return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
237}
238
239template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
240template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
241Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
242Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
243	(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
244{
245	if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
246	{
247		// Negative number, truncate to zero.
248		return zero(+1);
249	}
250	else if (other.isInf())
251	{
252		return inf(other.sign());
253	}
254	else if (other.isNaN())
255	{
256		return nan();
257	}
258	else if (other.isZero())
259	{
260		return zero(other.sign());
261	}
262	else
263	{
264		const int			eMin	= 1 - ExponentBias;
265		const int			eMax	= ((1<<ExponentBits)-2) - ExponentBias;
266
267		const StorageType	s		= StorageType(other.signBit()) << (ExponentBits+MantissaBits); // \note Not sign, but sign bit.
268		int					e		= other.exponent();
269		deUint64			m		= other.mantissa();
270
271		// Normalize denormalized values prior to conversion.
272		while (!(m & (1ull<<OtherMantissaBits)))
273		{
274			m <<= 1;
275			e  -= 1;
276		}
277
278		if (e < eMin)
279		{
280			// Underflow.
281			if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
282			{
283				// Shift and round (RTE).
284				int			bitDiff	= (OtherMantissaBits-MantissaBits) + (eMin-e);
285				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
286				deUint64	bias	= (m >> bitDiff) & 1;
287
288				return Float(StorageType(s | (m + half + bias) >> bitDiff));
289			}
290			else
291				return zero(other.sign());
292		}
293		else
294		{
295			// Remove leading 1.
296			m = m & ~(1ull<<OtherMantissaBits);
297
298			if (MantissaBits < OtherMantissaBits)
299			{
300				// Round mantissa (round to nearest even).
301				int			bitDiff	= OtherMantissaBits-MantissaBits;
302				deUint64	half	= (1ull << (bitDiff - 1)) - 1;
303				deUint64	bias	= (m >> bitDiff) & 1;
304
305				m = (m + half + bias) >> bitDiff;
306
307				if (m & (1ull<<MantissaBits))
308				{
309					// Overflow in mantissa.
310					m  = 0;
311					e += 1;
312				}
313			}
314			else
315			{
316				int bitDiff = MantissaBits-OtherMantissaBits;
317				m = m << bitDiff;
318			}
319
320			if (e > eMax)
321			{
322				// Overflow.
323				return inf(other.sign());
324			}
325			else
326			{
327				DE_ASSERT(de::inRange(e, eMin, eMax));
328				DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
329				DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
330
331				return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
332			}
333		}
334	}
335}
336
337} // tcu
338
339#endif // _TCUFLOAT_HPP
340