1/*-------------------------------------------------------------------------
2 * drawElements Base Portability Library
3 * -------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief 32-bit integer math.
22 *//*--------------------------------------------------------------------*/
23
24#include "deInt32.h"
25
26DE_BEGIN_EXTERN_C
27
28const deInt8 g_clzLUT[256] =
29{
30	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
31	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
46};
47
48const deInt8 g_ctzLUT[256] =
49{
50	8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
51	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
52	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
53	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
54	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
55	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
56	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
57	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
58	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
59	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
60	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
61	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
62	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
63	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
66};
67
68/*--------------------------------------------------------------------*//*!
69 * \brief Compute the reciprocal of an integer.
70 * \param a		Input value.
71 * \param rcp	Pointer to resulting reciprocal "mantissa" value.
72 * \param exp	Pointer to resulting exponent value.
73 *
74 * The returned value is split into an exponent part and a mantissa part for
75 * practical reasons. The actual reciprocal can be computed with the formula:
76 * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS).
77 *//*--------------------------------------------------------------------*/
78void deRcp32 (deUint32 a, deUint32* rcp, int* exp)
79{
80	enum { RCP_LUT_BITS = 8 };
81	static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] =
82	{
83		0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954,
84		0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88,
85		0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5,
86		0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3,
87		0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9,
88		0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900,
89		0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8,
90		0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5,
91		0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72,
92		0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb,
93		0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7,
94		0x369d0369, 0x366e9095, 0x36406c80, 0x36129663,
95		0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f,
96		0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e,
97		0x34834834, 0x3458550f, 0x342da7f2, 0x34034034,
98		0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4,
99		0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617,
100		0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24,
101		0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00,
102		0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79,
103		0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403,
104		0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2,
105		0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7,
106		0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584,
107		0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db,
108		0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d,
109		0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75,
110		0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a,
111		0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0,
112		0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87,
113		0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2,
114		0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74,
115		0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead,
116		0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5,
117		0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb,
118		0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44,
119		0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5,
120		0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c,
121		0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85,
122		0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2,
123		0x27627627, 0x274a4870, 0x27323858, 0x271a45a6,
124		0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf,
125		0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da,
126		0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055,
127		0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402,
128		0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925,
129		0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471,
130		0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd,
131		0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245,
132		0x243f6f02, 0x242af300, 0x24168e18, 0x24024024,
133		0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace,
134		0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf,
135		0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107,
136		0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1,
137		0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a,
138		0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033,
139		0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5,
140		0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac,
141		0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4,
142		0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766,
143		0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38,
144		0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398,
145		0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8,
146		0x20408102, 0x2030486c, 0x20202020, 0x20100804
147	};
148
149	DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS));
150
151	int			shift		= deClz32(a);
152	deUint32	normalized	= (deUint32)a << shift; /* Highest bit is always 1. */
153	int			lookupNdx	= (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */
154	deUint32	result;
155	deUint32	tmp;
156
157	/* Must be non-negative. */
158	DE_ASSERT(a > 0);
159
160	/* First approximation from lookup table. */
161	DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS));
162	result = s_rcpLUT[lookupNdx];
163
164	/* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */
165	tmp		= deSafeMuluAsr32(result, normalized, 31);
166	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
167
168	/* Second Newton-Raphson iteration (now has full accuracy). */
169	tmp		= deSafeMuluAsr32(result, normalized, 31);
170	result	= deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS);
171
172	/* Return results. */
173	*rcp = result;
174	*exp = 31 - shift;
175}
176
177DE_END_EXTERN_C
178