fixed.cpp revision 4f6e8d7a00cbeda1e70cc15be9c4af1018bdad53
1/* libs/pixelflinger/fixed.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9**     http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <stdio.h>
19
20#include <private/pixelflinger/ggl_context.h>
21#include <private/pixelflinger/ggl_fixed.h>
22
23
24// ------------------------------------------------------------------------
25
26int32_t gglRecipQNormalized(int32_t x, int* exponent)
27{
28    const int32_t s = x>>31;
29    uint32_t a = s ? -x : x;
30
31    // the result will overflow, so just set it to the biggest/inf value
32    if (ggl_unlikely(a <= 2LU)) {
33        *exponent = 0;
34        return s ? FIXED_MIN : FIXED_MAX;
35    }
36
37    // Newton-Raphson iteration:
38    // x = r*(2 - a*r)
39
40    const int32_t lz = gglClz(a);
41    a <<= lz;  // 0.32
42    uint32_t r = a;
43    // note: if a == 0x80000000, this means x was a power-of-2, in this
44    // case we don't need to compute anything. We get the reciprocal for
45    // (almost) free.
46    if (a != 0x80000000) {
47        r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
48        // 0.32 + 2.30 = 2.62 -> 2.30
49        // 2.30 + 2.30 = 4.60 -> 2.30
50        r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
51        r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
52    }
53
54    // shift right 1-bit to make room for the sign bit
55    *exponent = 30-lz-1;
56    r >>= 1;
57    return s ? -r : r;
58}
59
60int32_t gglRecipQ(GGLfixed x, int q)
61{
62    int shift;
63    x = gglRecipQNormalized(x, &shift);
64    shift += 16-q;
65    x += 1L << (shift-1);   // rounding
66    x >>= shift;
67    return x;
68}
69
70// ------------------------------------------------------------------------
71
72GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
73{
74    if ((d>>24) && ((d>>24)+1)) {
75        n >>= 8;
76        d >>= 8;
77    }
78    return gglMulx(n, gglRecip(d));
79}
80
81// ------------------------------------------------------------------------
82
83static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
84    // 1/sqrt(x) with x = 1-N/16, N=[8...1]
85    0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
86};
87
88GGLfixed gglSqrtRecipx(GGLfixed x)
89{
90    if (x == 0)         return FIXED_MAX;
91    if (x == FIXED_ONE) return x;
92    const GGLfixed a = x;
93    const int32_t lz = gglClz(x);
94    x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
95    const int32_t exp = lz - 16;
96    if (exp <= 0)   x >>= -exp>>1;
97    else            x <<= (exp>>1) + (exp & 1);
98    if (exp & 1) {
99        x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
100    }
101    // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
102    x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
103    x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
104    return x;
105}
106
107GGLfixed gglSqrtx(GGLfixed a)
108{
109    // Compute a full precision square-root (24 bits accuracy)
110    GGLfixed r = 0;
111    GGLfixed bit = 0x800000;
112    int32_t bshift = 15;
113    do {
114        GGLfixed temp = bit + (r<<1);
115        if (bshift >= 8)    temp <<= (bshift-8);
116        else                temp >>= (8-bshift);
117        if (a >= temp) {
118            r += bit;
119            a -= temp;
120        }
121        bshift--;
122    } while (bit>>=1);
123    return r;
124}
125
126// ------------------------------------------------------------------------
127
128static const GGLfixed ggl_log_approx_tab[] = {
129    // -ln(x)/ln(2) with x = N/16, N=[8...16]
130    0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
131};
132
133static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
134	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
135};
136
137GGLfixed gglPowx(GGLfixed x, GGLfixed y)
138{
139    // prerequisite: 0 <= x <= 1, and y >=0
140
141    // pow(x,y) = 2^(y*log2(x))
142    // =  2^(y*log2(x*(2^exp)*(2^-exp))))
143    // =  2^(y*(log2(X)-exp))
144    // =  2^(log2(X)*y - y*exp)
145    // =  2^( - (-log2(X)*y + y*exp) )
146
147    int32_t exp = gglClz(x) - 16;
148    GGLfixed f = x << exp;
149    x = (f & 0x0FFF)<<4;
150    f = (f >> 12) & 0x7;
151    GGLfixed p = gglMulAddx(
152            ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
153            ggl_log_approx_tab[f]);
154    p = gglMulAddx(p, y, y*exp);
155    exp = gglFixedToIntFloor(p);
156    if (exp < 31) {
157        p = gglFracx(p);
158        x = (p & 0x1FFF)<<3;
159        p >>= 13;
160        p = gglMulAddx(
161                ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
162                ggl_alog_approx_tab[p]);
163        p >>= exp;
164    } else {
165        p = 0;
166    }
167    return p;
168        // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
169}
170
171// ------------------------------------------------------------------------
172
173int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
174{
175    //int32_t r =int32_t((int64_t(n)<<i)/d);
176    const int32_t ds = n^d;
177    if (n<0) n = -n;
178    if (d<0) d = -d;
179    int nd = gglClz(d) - gglClz(n);
180    i += nd + 1;
181    if (nd > 0) d <<= nd;
182    else        n <<= -nd;
183    uint32_t q = 0;
184
185    int j = i & 7;
186    i >>= 3;
187
188    // gcc deals with the code below pretty well.
189    // we get 3.75 cycles per bit in the main loop
190    // and 8 cycles per bit in the termination loop
191    if (ggl_likely(i)) {
192        n -= d;
193        do {
194            q <<= 8;
195            if (n>=0)   q |= 128;
196            else        n += d;
197            n = n*2 - d;
198            if (n>=0)   q |= 64;
199            else        n += d;
200            n = n*2 - d;
201            if (n>=0)   q |= 32;
202            else        n += d;
203            n = n*2 - d;
204            if (n>=0)   q |= 16;
205            else        n += d;
206            n = n*2 - d;
207            if (n>=0)   q |= 8;
208            else        n += d;
209            n = n*2 - d;
210            if (n>=0)   q |= 4;
211            else        n += d;
212            n = n*2 - d;
213            if (n>=0)   q |= 2;
214            else        n += d;
215            n = n*2 - d;
216            if (n>=0)   q |= 1;
217            else        n += d;
218
219            if (--i == 0)
220                goto finish;
221
222            n = n*2 - d;
223        } while(true);
224        do {
225            q <<= 1;
226            n = n*2 - d;
227            if (n>=0)   q |= 1;
228            else        n += d;
229        finish: ;
230        } while (j--);
231        return (ds<0) ? -q : q;
232    }
233
234    n -= d;
235    if (n>=0)   q |= 1;
236    else        n += d;
237    j--;
238    goto finish;
239}
240
241// ------------------------------------------------------------------------
242
243// assumes that the int32_t values of a, b, and c are all positive
244// use when both a and b are larger than c
245
246template <typename T>
247static inline void swap(T& a, T& b) {
248    T t(a);
249    a = b;
250    b = t;
251}
252
253static __attribute__((noinline))
254int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
255{
256	// first we compute a*b as a 64-bit integer
257    // (GCC generates umull with the code below)
258    uint64_t ab = uint64_t(a)*b;
259    uint32_t hi = ab>>32;
260    uint32_t lo = ab;
261    uint32_t result;
262
263	// now perform the division
264	if (hi >= c) {
265	overflow:
266		result = 0x7fffffff;  // basic overflow
267	} else if (hi == 0) {
268		result = lo/c;  // note: c can't be 0
269		if ((result >> 31) != 0)  // result must fit in 31 bits
270			goto overflow;
271	} else {
272		uint32_t r = hi;
273		int bits = 31;
274	    result = 0;
275		do {
276			r = (r << 1) | (lo >> 31);
277			lo <<= 1;
278			result <<= 1;
279			if (r >= c) {
280				r -= c;
281				result |= 1;
282			}
283		} while (bits--);
284	}
285	return int32_t(result);
286}
287
288// assumes a >= 0 and c >= b >= 0
289static inline
290int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
291{
292    int32_t r = 0, q = 0, i;
293    int leading = gglClz(a);
294    i = 32 - leading;
295    a <<= leading;
296    do {
297        r <<= 1;
298        if (a < 0)
299            r += b;
300        a <<= 1;
301        q <<= 1;
302        if (r >= c) {
303            r -= c;
304            q++;
305        }
306        asm(""::); // gcc generates better code this way
307        if (r >= c) {
308            r -= c;
309            q++;
310        }
311    }
312    while (--i);
313    return q;
314}
315
316// this function computes a*b/c with 64-bit intermediate accuracy
317// overflows (e.g. division by 0) are handled and return INT_MAX
318
319int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
320{
321	int32_t result;
322	int32_t sign = a^b^c;
323
324	if (a < 0) a = -a;
325	if (b < 0) b = -b;
326	if (c < 0) c = -c;
327
328    if (a < b) {
329        swap(a, b);
330    }
331
332	if (b <= c) result = quick_muldiv(a, b, c);
333	else        result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
334
335	if (sign < 0)
336		result = -result;
337
338    return result;
339}
340