1/* libs/pixelflinger/fixed.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19
20#include <private/pixelflinger/ggl_context.h>
21#include <private/pixelflinger/ggl_fixed.h>
22
23
24// ------------------------------------------------------------------------
25
26int32_t gglRecipQNormalized(int32_t x, int* exponent)
27{
28    const int32_t s = x>>31;
29    uint32_t a = s ? -x : x;
30
31    // the result will overflow, so just set it to the biggest/inf value
32    if (ggl_unlikely(a <= 2LU)) {
33        *exponent = 0;
34        return s ? FIXED_MIN : FIXED_MAX;
35    }
36
37    // Newton-Raphson iteration:
38    // x = r*(2 - a*r)
39
40    const int32_t lz = gglClz(a);
41    a <<= lz;  // 0.32
42    uint32_t r = a;
43    // note: if a == 0x80000000, this means x was a power-of-2, in this
44    // case we don't need to compute anything. We get the reciprocal for
45    // (almost) free.
46    if (a != 0x80000000) {
47        r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
48        // 0.32 + 2.30 = 2.62 -> 2.30
49        // 2.30 + 2.30 = 4.60 -> 2.30
50        r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
51        r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
52    }
53
54    // shift right 1-bit to make room for the sign bit
55    *exponent = 30-lz-1;
56    r >>= 1;
57    return s ? -r : r;
58}
59
60int32_t gglRecipQ(GGLfixed x, int q)
61{
62    int shift;
63    x = gglRecipQNormalized(x, &shift);
64    shift += 16-q;
65    if (shift > 0)
66        x += 1L << (shift-1);   // rounding
67    x >>= shift;
68    return x;
69}
70
71// ------------------------------------------------------------------------
72
73GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
74{
75    if ((d>>24) && ((d>>24)+1)) {
76        n >>= 8;
77        d >>= 8;
78    }
79    return gglMulx(n, gglRecip(d));
80}
81
82// ------------------------------------------------------------------------
83
84static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
85    // 1/sqrt(x) with x = 1-N/16, N=[8...1]
86    0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
87};
88
89GGLfixed gglSqrtRecipx(GGLfixed x)
90{
91    if (x == 0)         return FIXED_MAX;
92    if (x == FIXED_ONE) return x;
93    const GGLfixed a = x;
94    const int32_t lz = gglClz(x);
95    x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
96    const int32_t exp = lz - 16;
97    if (exp <= 0)   x >>= -exp>>1;
98    else            x <<= (exp>>1) + (exp & 1);
99    if (exp & 1) {
100        x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
101    }
102    // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
103    x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
104    x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
105    return x;
106}
107
108GGLfixed gglSqrtx(GGLfixed a)
109{
110    // Compute a full precision square-root (24 bits accuracy)
111    GGLfixed r = 0;
112    GGLfixed bit = 0x800000;
113    int32_t bshift = 15;
114    do {
115        GGLfixed temp = bit + (r<<1);
116        if (bshift >= 8)    temp <<= (bshift-8);
117        else                temp >>= (8-bshift);
118        if (a >= temp) {
119            r += bit;
120            a -= temp;
121        }
122        bshift--;
123    } while (bit>>=1);
124    return r;
125}
126
127// ------------------------------------------------------------------------
128
129static const GGLfixed ggl_log_approx_tab[] = {
130    // -ln(x)/ln(2) with x = N/16, N=[8...16]
131    0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
132};
133
134static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
135	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
136};
137
138GGLfixed gglPowx(GGLfixed x, GGLfixed y)
139{
140    // prerequisite: 0 <= x <= 1, and y >=0
141
142    // pow(x,y) = 2^(y*log2(x))
143    // =  2^(y*log2(x*(2^exp)*(2^-exp))))
144    // =  2^(y*(log2(X)-exp))
145    // =  2^(log2(X)*y - y*exp)
146    // =  2^( - (-log2(X)*y + y*exp) )
147
148    int32_t exp = gglClz(x) - 16;
149    GGLfixed f = x << exp;
150    x = (f & 0x0FFF)<<4;
151    f = (f >> 12) & 0x7;
152    GGLfixed p = gglMulAddx(
153            ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
154            ggl_log_approx_tab[f]);
155    p = gglMulAddx(p, y, y*exp);
156    exp = gglFixedToIntFloor(p);
157    if (exp < 31) {
158        p = gglFracx(p);
159        x = (p & 0x1FFF)<<3;
160        p >>= 13;
161        p = gglMulAddx(
162                ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
163                ggl_alog_approx_tab[p]);
164        p >>= exp;
165    } else {
166        p = 0;
167    }
168    return p;
169        // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
170}
171
172// ------------------------------------------------------------------------
173
174int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
175{
176    //int32_t r =int32_t((int64_t(n)<<i)/d);
177    const int32_t ds = n^d;
178    if (n<0) n = -n;
179    if (d<0) d = -d;
180    int nd = gglClz(d) - gglClz(n);
181    i += nd + 1;
182    if (nd > 0) d <<= nd;
183    else        n <<= -nd;
184    uint32_t q = 0;
185
186    int j = i & 7;
187    i >>= 3;
188
189    // gcc deals with the code below pretty well.
190    // we get 3.75 cycles per bit in the main loop
191    // and 8 cycles per bit in the termination loop
192    if (ggl_likely(i)) {
193        n -= d;
194        do {
195            q <<= 8;
196            if (n>=0)   q |= 128;
197            else        n += d;
198            n = n*2 - d;
199            if (n>=0)   q |= 64;
200            else        n += d;
201            n = n*2 - d;
202            if (n>=0)   q |= 32;
203            else        n += d;
204            n = n*2 - d;
205            if (n>=0)   q |= 16;
206            else        n += d;
207            n = n*2 - d;
208            if (n>=0)   q |= 8;
209            else        n += d;
210            n = n*2 - d;
211            if (n>=0)   q |= 4;
212            else        n += d;
213            n = n*2 - d;
214            if (n>=0)   q |= 2;
215            else        n += d;
216            n = n*2 - d;
217            if (n>=0)   q |= 1;
218            else        n += d;
219
220            if (--i == 0)
221                goto finish;
222
223            n = n*2 - d;
224        } while(true);
225        do {
226            q <<= 1;
227            n = n*2 - d;
228            if (n>=0)   q |= 1;
229            else        n += d;
230        finish: ;
231        } while (j--);
232        return (ds<0) ? -q : q;
233    }
234
235    n -= d;
236    if (n>=0)   q |= 1;
237    else        n += d;
238    j--;
239    goto finish;
240}
241
242// ------------------------------------------------------------------------
243
244// assumes that the int32_t values of a, b, and c are all positive
245// use when both a and b are larger than c
246
247template <typename T>
248static inline void swap(T& a, T& b) {
249    T t(a);
250    a = b;
251    b = t;
252}
253
254static __attribute__((noinline))
255int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
256{
257	// first we compute a*b as a 64-bit integer
258    // (GCC generates umull with the code below)
259    uint64_t ab = uint64_t(a)*b;
260    uint32_t hi = ab>>32;
261    uint32_t lo = ab;
262    uint32_t result;
263
264	// now perform the division
265	if (hi >= c) {
266	overflow:
267		result = 0x7fffffff;  // basic overflow
268	} else if (hi == 0) {
269		result = lo/c;  // note: c can't be 0
270		if ((result >> 31) != 0)  // result must fit in 31 bits
271			goto overflow;
272	} else {
273		uint32_t r = hi;
274		int bits = 31;
275	    result = 0;
276		do {
277			r = (r << 1) | (lo >> 31);
278			lo <<= 1;
279			result <<= 1;
280			if (r >= c) {
281				r -= c;
282				result |= 1;
283			}
284		} while (bits--);
285	}
286	return int32_t(result);
287}
288
289// assumes a >= 0 and c >= b >= 0
290static inline
291int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
292{
293    int32_t r = 0, q = 0, i;
294    int leading = gglClz(a);
295    i = 32 - leading;
296    a <<= leading;
297    do {
298        r <<= 1;
299        if (a < 0)
300            r += b;
301        a <<= 1;
302        q <<= 1;
303        if (r >= c) {
304            r -= c;
305            q++;
306        }
307        asm(""::); // gcc generates better code this way
308        if (r >= c) {
309            r -= c;
310            q++;
311        }
312    }
313    while (--i);
314    return q;
315}
316
317// this function computes a*b/c with 64-bit intermediate accuracy
318// overflows (e.g. division by 0) are handled and return INT_MAX
319
320int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
321{
322	int32_t result;
323	int32_t sign = a^b^c;
324
325	if (a < 0) a = -a;
326	if (b < 0) b = -b;
327	if (c < 0) c = -c;
328
329    if (a < b) {
330        swap(a, b);
331    }
332
333	if (b <= c) result = quick_muldiv(a, b, c);
334	else        result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
335
336	if (sign < 0)
337		result = -result;
338
339    return result;
340}
341