1/*-------------------------------------------------------------------------
2 * drawElements Quality Program Tester Core
3 * ----------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Adjustable-precision floating point operations.
22 *//*--------------------------------------------------------------------*/
23
24#include "tcuFloatFormat.hpp"
25
26#include "deMath.h"
27#include "deUniquePtr.hpp"
28
29#include <sstream>
30#include <iomanip>
31#include <limits>
32
33namespace tcu
34{
35namespace
36{
37
38Interval chooseInterval(YesNoMaybe choice, const Interval& no, const Interval& yes)
39{
40	switch (choice)
41	{
42		case NO:	return no;
43		case YES:	return yes;
44		case MAYBE:	return no | yes;
45		default:	DE_ASSERT(!"Impossible case");
46	}
47
48	return Interval();
49}
50
51double computeMaxValue (int maxExp, int fractionBits)
52{
53	return (deLdExp(1.0, maxExp) +
54			deLdExp(double((1ull << fractionBits) - 1), maxExp - fractionBits));
55}
56
57} // anonymous
58
59FloatFormat::FloatFormat (int			minExp,
60						  int			maxExp,
61						  int			fractionBits,
62						  bool			exactPrecision,
63						  YesNoMaybe	hasSubnormal_,
64						  YesNoMaybe	hasInf,
65						  YesNoMaybe	hasNaN)
66	: m_minExp			(minExp)
67	, m_maxExp			(maxExp)
68	, m_fractionBits	(fractionBits)
69	, m_hasSubnormal	(hasSubnormal_)
70	, m_hasInf			(hasInf)
71	, m_hasNaN			(hasNaN)
72	, m_exactPrecision	(exactPrecision)
73	, m_maxValue		(computeMaxValue(maxExp, fractionBits))
74{
75	DE_ASSERT(minExp <= maxExp);
76}
77
78/*-------------------------------------------------------------------------
79 * On the definition of ULP
80 *
81 * The GLSL spec does not define ULP. However, it refers to IEEE 754, which
82 * (reportedly) uses Harrison's definition:
83 *
84 * ULP(x) is the distance between the closest floating point numbers
85 * a and be such that a <= x <= b and a != b
86 *
87 * Note that this means that when x = 2^n, ULP(x) = 2^(n-p-1), i.e. it is the
88 * distance to the next lowest float, not next highest.
89 *
90 * Furthermore, it is assumed that ULP is calculated relative to the exact
91 * value, not the approximation. This is because otherwise a less accurate
92 * approximation could be closer in ULPs, because its ULPs are bigger.
93 *
94 * For details, see "On the definition of ulp(x)" by Jean-Michel Muller
95 *
96 *-----------------------------------------------------------------------*/
97
98double FloatFormat::ulp (double x, double count) const
99{
100	int				exp		= 0;
101	const double	frac	= deFractExp(deAbs(x), &exp);
102
103	if (deIsNaN(frac))
104		return TCU_NAN;
105	else if (deIsInf(frac))
106		return deLdExp(1.0, m_maxExp - m_fractionBits);
107	else if (frac == 1.0)
108	{
109		// Harrison's ULP: choose distance to closest (i.e. next lower) at binade
110		// boundary.
111		--exp;
112	}
113	else if (frac == 0.0)
114		exp = m_minExp;
115
116	// ULP cannot be lower than the smallest quantum.
117	exp = de::max(exp, m_minExp);
118
119	{
120		const double		oneULP	= deLdExp(1.0, exp - m_fractionBits);
121		ScopedRoundingMode	ctx		(DE_ROUNDINGMODE_TO_POSITIVE_INF);
122
123		return oneULP * count;
124	}
125}
126
127//! Return the difference between the given nominal exponent and
128//! the exponent of the lowest significand bit of the
129//! representation of a number with this format.
130//! For normal numbers this is the number of significand bits, but
131//! for subnormals it is less and for values of exp where 2^exp is too
132//! small to represent it is <0
133int FloatFormat::exponentShift (int exp) const
134{
135	return m_fractionBits - de::max(m_minExp - exp, 0);
136}
137
138//! Return the number closest to `d` that is exactly representable with the
139//! significand bits and minimum exponent of the floatformat. Round up if
140//! `upward` is true, otherwise down.
141double FloatFormat::round (double d, bool upward) const
142{
143	int				exp			= 0;
144	const double	frac		= deFractExp(d, &exp);
145	const int		shift		= exponentShift(exp);
146	const double	shiftFrac	= deLdExp(frac, shift);
147	const double	roundFrac	= upward ? deCeil(shiftFrac) : deFloor(shiftFrac);
148
149	return deLdExp(roundFrac, exp - shift);
150}
151
152//! Return the range of numbers that `d` might be converted to in the
153//! floatformat, given its limitations with infinities, subnormals and maximum
154//! exponent.
155Interval FloatFormat::clampValue (double d) const
156{
157	const double	rSign		= deSign(d);
158	int				rExp		= 0;
159
160	DE_ASSERT(!deIsNaN(d));
161
162	deFractExp(d, &rExp);
163	if (rExp < m_minExp)
164		return chooseInterval(m_hasSubnormal, rSign * 0.0, d);
165	else if (deIsInf(d) || rExp > m_maxExp)
166		return chooseInterval(m_hasInf, rSign * getMaxValue(), rSign * TCU_INFINITY);
167
168	return Interval(d);
169}
170
171//! Return the range of numbers that might be used with this format to
172//! represent a number within `x`.
173Interval FloatFormat::convert (const Interval& x) const
174{
175	Interval ret;
176	Interval tmp = x;
177
178	if (x.hasNaN())
179	{
180		// If NaN might be supported, NaN is a legal return value
181		if (m_hasNaN != NO)
182			ret |= TCU_NAN;
183
184		// If NaN might not be supported, any (non-NaN) value is legal,
185		// _subject_ to clamping. Hence we modify tmp, not ret.
186		if (m_hasNaN != YES)
187			tmp = Interval::unbounded();
188	}
189
190	// Round both bounds _inwards_ to closest representable values.
191	if (!tmp.empty())
192		ret |= clampValue(round(tmp.lo(), true)) | clampValue(round(tmp.hi(), false));
193
194	// If this format's precision is not exact, the (possibly out-of-bounds)
195	// original value is also a possible result.
196	if (!m_exactPrecision)
197		ret |= x;
198
199	return ret;
200}
201
202double FloatFormat::roundOut (double d, bool upward, bool roundUnderOverflow) const
203{
204	int	exp	= 0;
205	deFractExp(d, &exp);
206
207	if (roundUnderOverflow && exp > m_maxExp && (upward == (d < 0.0)))
208		return deSign(d) * getMaxValue();
209	else
210		return round(d, upward);
211}
212
213//! Round output of an operation.
214//! \param roundUnderOverflow Can +/-inf rounded to min/max representable;
215//!							  should be false if any of operands was inf, true otherwise.
216Interval FloatFormat::roundOut (const Interval& x, bool roundUnderOverflow) const
217{
218	Interval ret = x.nan();
219
220	if (!x.empty())
221		ret |= Interval(roundOut(x.lo(), false, roundUnderOverflow),
222						roundOut(x.hi(), true, roundUnderOverflow));
223
224	return ret;
225}
226
227std::string	FloatFormat::floatToHex	(double x) const
228{
229	if (deIsNaN(x))
230		return "NaN";
231	else if (deIsInf(x))
232		return (x < 0.0 ? "-" : "+") + std::string("inf");
233	else if (x == 0.0) // \todo [2014-03-27 lauri] Negative zero
234		return "0.0";
235
236	int					exp			= 0;
237	const double		frac		= deFractExp(deAbs(x), &exp);
238	const int			shift		= exponentShift(exp);
239	const deUint64		bits		= deUint64(deLdExp(frac, shift));
240	const deUint64		whole		= bits >> m_fractionBits;
241	const deUint64		fraction	= bits & ((deUint64(1) << m_fractionBits) - 1);
242	const int			exponent	= exp + m_fractionBits - shift;
243	const int			numDigits	= (m_fractionBits + 3) / 4;
244	const deUint64		aligned		= fraction << (numDigits * 4 - m_fractionBits);
245	std::ostringstream	oss;
246
247	oss << (x < 0 ? "-" : "")
248		<< "0x" << whole << "."
249		<< std::hex << std::setw(numDigits) << std::setfill('0') << aligned
250		<< "p" << std::dec << std::setw(0) << exponent;
251
252	return oss.str();
253}
254
255std::string FloatFormat::intervalToHex (const Interval& interval) const
256{
257	if (interval.empty())
258		return interval.hasNaN() ? "{ NaN }" : "{}";
259
260	else if (interval.lo() == interval.hi())
261		return (std::string(interval.hasNaN() ? "{ NaN, " : "{ ") +
262				floatToHex(interval.lo()) + " }");
263	else if (interval == Interval::unbounded(true))
264		return "<any>";
265
266	return (std::string(interval.hasNaN() ? "{ NaN } | " : "") +
267			"[" + floatToHex(interval.lo()) + ", " + floatToHex(interval.hi()) + "]");
268}
269
270template <typename T>
271static FloatFormat nativeFormat (void)
272{
273	typedef std::numeric_limits<T> Limits;
274
275	DE_ASSERT(Limits::radix == 2);
276
277	return FloatFormat(Limits::min_exponent - 1,	// These have a built-in offset of one
278					   Limits::max_exponent - 1,
279					   Limits::digits - 1,			// don't count the hidden bit
280					   Limits::has_denorm != std::denorm_absent,
281					   Limits::has_infinity ? YES : NO,
282					   Limits::has_quiet_NaN ? YES : NO,
283					   ((Limits::has_denorm == std::denorm_present) ? YES :
284						(Limits::has_denorm == std::denorm_absent) ? NO :
285						MAYBE));
286}
287
288FloatFormat	FloatFormat::nativeFloat (void)
289{
290	return nativeFormat<float>();
291}
292
293FloatFormat	FloatFormat::nativeDouble (void)
294{
295	return nativeFormat<double>();
296}
297
298namespace
299{
300
301using std::string;
302using std::ostringstream;
303using de::MovePtr;
304using de::UniquePtr;
305
306class Test
307{
308protected:
309
310							Test		(MovePtr<FloatFormat> fmt) : m_fmt(fmt) {}
311	double					p			(int e) const	 			{ return deLdExp(1.0, e); }
312	void					check		(const string&	expr,
313										 double			result,
314										 double			reference) const;
315	void					testULP		(double arg, double ref) const;
316	void					testRound	(double arg, double refDown, double refUp) const;
317
318	UniquePtr<FloatFormat>	m_fmt;
319};
320
321void Test::check (const string& expr, double result, double reference) const
322{
323	if (result != reference)
324	{
325		ostringstream oss;
326		oss << expr << " returned " << result << ", expected " << reference;
327		TCU_FAIL(oss.str().c_str());
328	}
329}
330
331void Test::testULP (double arg, double ref) const
332{
333	ostringstream	oss;
334
335	oss << "ulp(" << arg << ")";
336	check(oss.str(), m_fmt->ulp(arg), ref);
337}
338
339void Test::testRound (double arg, double refDown, double refUp) const
340{
341	{
342		ostringstream oss;
343		oss << "round(" << arg << ", false)";
344		check(oss.str(), m_fmt->round(arg, false), refDown);
345	}
346	{
347		ostringstream oss;
348		oss << "round(" << arg << ", true)";
349		check(oss.str(), m_fmt->round(arg, true), refUp);
350	}
351}
352
353class TestBinary32 : public Test
354{
355public:
356			TestBinary32 (void)
357				: Test (MovePtr<FloatFormat>(new FloatFormat(-126, 127, 23, true))) {}
358
359	void	runTest	(void) const;
360};
361
362void TestBinary32::runTest (void) const
363{
364	testULP(p(0),				p(-24));
365	testULP(p(0) + p(-23),		p(-23));
366	testULP(p(-124),			p(-148));
367	testULP(p(-125),			p(-149));
368	testULP(p(-125) + p(-140),	p(-148));
369	testULP(p(-126),			p(-149));
370	testULP(p(-130),			p(-149));
371
372	testRound(p(0) + p(-20) + p(-40),	p(0) + p(-20),		p(0) + p(-20) + p(-23));
373	testRound(p(-126) - p(-150),		p(-126) - p(-149),	p(-126));
374
375	TCU_CHECK(m_fmt->floatToHex(p(0)) == "0x1.000000p0");
376	TCU_CHECK(m_fmt->floatToHex(p(8) + p(-4)) == "0x1.001000p8");
377	TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
378	TCU_CHECK(m_fmt->floatToHex(p(-140)) == "0x0.000400p-126");
379	TCU_CHECK(m_fmt->floatToHex(p(-126) + p(-125)) == "0x1.800000p-125");
380}
381
382} // anonymous
383
384void FloatFormat_selfTest (void)
385{
386	TestBinary32	test32;
387	test32.runTest();
388}
389
390} // tcu
391