1/*-------------------------------------------------------------------------
2 * drawElements Base Portability Library
3 * -------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief 32-bit integer math.
22 *//*--------------------------------------------------------------------*/
23
24#include "deInt32.h"
25
26DE_BEGIN_EXTERN_C
27
28const deInt8 g_clzLUT[256] =
29{
30	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
31	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
46};
47
48/*--------------------------------------------------------------------*//*!
49 * \brief Compute the reciprocal of an integer.
50 * \param a		Input value.
51 * \param rcp	Pointer to resulting reciprocal "mantissa" value.
52 * \param exp	Pointer to resulting exponent value.
53 *
54 * The returned value is split into an exponent part and a mantissa part for
55 * practical reasons. The actual reciprocal can be computed with the formula:
56 * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS).
57 *//*--------------------------------------------------------------------*/
58void deRcp32 (deUint32 a, deUint32* rcp, int* exp)
59{
60	enum { RCP_LUT_BITS = 8 };
61	static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] =
62	{
63		0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954,
64		0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88,
65		0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5,
66		0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3,
67		0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9,
68		0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900,
69		0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8,
70		0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5,
71		0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72,
72		0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb,
73		0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7,
74		0x369d0369, 0x366e9095, 0x36406c80, 0x36129663,
75		0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f,
76		0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e,
77		0x34834834, 0x3458550f, 0x342da7f2, 0x34034034,
78		0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4,
79		0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617,
80		0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24,
81		0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00,
82		0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79,
83		0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403,
84		0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2,
85		0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7,
86		0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584,
87		0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db,
88		0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d,
89		0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75,
90		0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a,
91		0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0,
92		0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87,
93		0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2,
94		0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74,
95		0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead,
96		0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5,
97		0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb,
98		0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44,
99		0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5,
100		0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c,
101		0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85,
102		0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2,
103		0x27627627, 0x274a4870, 0x27323858, 0x271a45a6,
104		0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf,
105		0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da,
106		0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055,
107		0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402,
108		0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925,
109		0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471,
110		0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd,
111		0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245,
112		0x243f6f02, 0x242af300, 0x24168e18, 0x24024024,
113		0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace,
114		0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf,
115		0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107,
116		0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1,
117		0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a,
118		0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033,
119		0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5,
120		0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac,
121		0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4,
122		0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766,
123		0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38,
124		0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398,
125		0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8,
126		0x20408102, 0x2030486c, 0x20202020, 0x20100804
127	};
128
129	DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS));
130
131	int			shift		= deClz32(a);
132	deUint32	normalized	= (deUint32)a << shift; /* Highest bit is always 1. */
133	int			lookupNdx	= (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */
134	deUint32	result;
135	deUint32	tmp;
136
137	/* Must be non-negative. */
138	DE_ASSERT(a > 0);
139
140	/* First approximation from lookup table. */
141	DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS));
142	result = s_rcpLUT[lookupNdx];
143
144	/* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */
145	tmp		= deSafeMuluAsr32(result, normalized, 31);
146	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
147
148	/* Second Newton-Raphson iteration (now has full accuracy). */
149	tmp		= deSafeMuluAsr32(result, normalized, 31);
150	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
151
152	/* Return results. */
153	*rcp = result;
154	*exp = 31 - shift;
155}
156
157DE_END_EXTERN_C
158