1/*
2 * Copyright (C) 2005 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_GGL_FIXED_H
18#define ANDROID_GGL_FIXED_H
19
20#include <math.h>
21#include <pixelflinger/pixelflinger.h>
22
23// ----------------------------------------------------------------------------
24
25#define CONST           __attribute__((const))
26#define ALWAYS_INLINE   __attribute__((always_inline))
27
28const GGLfixed FIXED_BITS = 16;
29const GGLfixed FIXED_EPSILON  = 1;
30const GGLfixed FIXED_ONE  = 1L<<FIXED_BITS;
31const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
32const GGLfixed FIXED_MIN  = 0x80000000L;
33const GGLfixed FIXED_MAX  = 0x7FFFFFFFL;
34
35inline GGLfixed gglIntToFixed(GGLfixed i)       ALWAYS_INLINE ;
36inline GGLfixed gglFixedToIntRound(GGLfixed f)  ALWAYS_INLINE ;
37inline GGLfixed gglFixedToIntFloor(GGLfixed f)  ALWAYS_INLINE ;
38inline GGLfixed gglFixedToIntCeil(GGLfixed f)   ALWAYS_INLINE ;
39inline GGLfixed gglFracx(GGLfixed v)            ALWAYS_INLINE ;
40inline GGLfixed gglFloorx(GGLfixed v)           ALWAYS_INLINE ;
41inline GGLfixed gglCeilx(GGLfixed v)            ALWAYS_INLINE ;
42inline GGLfixed gglCenterx(GGLfixed v)          ALWAYS_INLINE ;
43inline GGLfixed gglRoundx(GGLfixed v)           ALWAYS_INLINE ;
44
45GGLfixed gglIntToFixed(GGLfixed i) {
46    return i<<FIXED_BITS;
47}
48GGLfixed gglFixedToIntRound(GGLfixed f) {
49    return (f + FIXED_HALF)>>FIXED_BITS;
50}
51GGLfixed gglFixedToIntFloor(GGLfixed f) {
52    return f>>FIXED_BITS;
53}
54GGLfixed gglFixedToIntCeil(GGLfixed f) {
55    return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
56}
57
58GGLfixed gglFracx(GGLfixed v) {
59    return v & ((1<<FIXED_BITS)-1);
60}
61GGLfixed gglFloorx(GGLfixed v) {
62    return gglFixedToIntFloor(v)<<FIXED_BITS;
63}
64GGLfixed gglCeilx(GGLfixed v) {
65    return gglFixedToIntCeil(v)<<FIXED_BITS;
66}
67GGLfixed gglCenterx(GGLfixed v) {
68    return gglFloorx(v + FIXED_HALF) | FIXED_HALF;
69}
70GGLfixed gglRoundx(GGLfixed v) {
71    return gglFixedToIntRound(v)<<FIXED_BITS;
72}
73
74// conversion from (unsigned) int, short, byte to fixed...
75#define GGL_B_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
76#define GGL_S_TO_X(_x)      GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
77#define GGL_I_TO_X(_x)      GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
78#define GGL_UB_TO_X(_x)     GGLfixed(   uint32_t(_x) +      \
79                                        (uint32_t(_x)<<8) + \
80                                        (uint32_t(_x)>>7) )
81#define GGL_US_TO_X(_x)     GGLfixed( (_x) + ((_x)>>15) )
82#define GGL_UI_TO_X(_x)     GGLfixed( (((_x)>>1)+1)>>15 )
83
84// ----------------------------------------------------------------------------
85
86GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
87GGLfixed gglSqrtx(GGLfixed a) CONST;
88GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
89GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST;
90int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
91
92int32_t gglRecipQNormalized(int32_t x, int* exponent);
93int32_t gglRecipQ(GGLfixed x, int q) CONST;
94
95inline GGLfixed gglRecip(GGLfixed x) CONST;
96inline GGLfixed gglRecip(GGLfixed x) {
97    return gglRecipQ(x, 16);
98}
99
100inline GGLfixed gglRecip28(GGLfixed x) CONST;
101int32_t gglRecip28(GGLfixed x) {
102    return gglRecipQ(x, 28);
103}
104
105// ----------------------------------------------------------------------------
106
107#if defined(__arm__) && !defined(__thumb__)
108
109// inline ARM implementations
110inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
111inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
112    GGLfixed result, t;
113    if (__builtin_constant_p(shift)) {
114    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
115        "movs   %[lo], %[lo], lsr %[rshift]         \n"
116        "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
117        : [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
118        : "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
119        : "cc"
120        );
121    } else {
122    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
123        "movs   %[lo], %[lo], lsr %[rshift]         \n"
124        "adc    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
125        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
126        : "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
127        : "cc"
128        );
129    }
130    return result;
131}
132
133inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
134inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
135    GGLfixed result, t;
136    if (__builtin_constant_p(shift)) {
137    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
138        "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
139        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
140        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
141        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
142        );
143    } else {
144    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
145        "add    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
146        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
147        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
148        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
149        );
150    }
151    return result;
152}
153
154inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
155inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
156    GGLfixed result, t;
157    if (__builtin_constant_p(shift)) {
158    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
159        "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
160        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
161        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
162        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
163        );
164    } else {
165    asm("smull  %[lo], %[hi], %[x], %[y]            \n"
166        "rsb    %[lo], %[a],  %[lo], lsr %[rshift]  \n"
167        "add    %[lo], %[lo], %[hi], lsl %[lshift]  \n"
168        : [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
169        : "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
170        );
171    }
172    return result;
173}
174
175inline int64_t gglMulii(int32_t x, int32_t y) CONST;
176inline int64_t gglMulii(int32_t x, int32_t y)
177{
178    // 64-bits result: r0=low, r1=high
179    union {
180        struct {
181            int32_t lo;
182            int32_t hi;
183        } s;
184        int64_t res;
185    };
186    asm("smull %0, %1, %2, %3   \n"
187        : "=r"(s.lo), "=&r"(s.hi)
188        : "%r"(x), "r"(y)
189        :
190        );
191    return res;
192}
193#elif defined(__mips__) && __mips_isa_rev < 6
194
195/*inline MIPS implementations*/
196inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
197inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
198    GGLfixed result,tmp,tmp1,tmp2;
199
200    if (__builtin_constant_p(shift)) {
201        if (shift == 0) {
202            asm ("mult %[a], %[b] \t\n"
203              "mflo  %[res]   \t\n"
204            : [res]"=&r"(result),[tmp]"=&r"(tmp)
205            : [a]"r"(a),[b]"r"(b)
206            : "%hi","%lo"
207            );
208        } else if (shift == 32)
209        {
210            asm ("mult %[a], %[b] \t\n"
211            "li  %[tmp],1\t\n"
212            "sll  %[tmp],%[tmp],0x1f\t\n"
213            "mflo %[res]   \t\n"
214            "addu %[tmp1],%[tmp],%[res] \t\n"
215            "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
216            "sra %[tmp],%[tmp],0x1f \t\n"
217            "mfhi  %[res]   \t\n"
218            "addu %[res],%[res],%[tmp]\t\n"
219            "addu %[res],%[res],%[tmp1]\t\n"
220            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
221            : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
222            : "%hi","%lo"
223            );
224        } else if ((shift >0) && (shift < 32))
225        {
226            asm ("mult %[a], %[b] \t\n"
227            "li  %[tmp],1 \t\n"
228            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
229            "mflo  %[res]   \t\n"
230            "addu %[tmp1],%[tmp],%[res] \t\n"
231            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
232            "addu  %[res],%[res],%[tmp] \t\n"
233            "mfhi  %[tmp]   \t\n"
234            "addu  %[tmp],%[tmp],%[tmp1] \t\n"
235            "sll   %[tmp],%[tmp],%[lshift] \t\n"
236            "srl   %[res],%[res],%[rshift]    \t\n"
237            "or    %[res],%[res],%[tmp] \t\n"
238            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
239            : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
240            : "%hi","%lo"
241            );
242        } else {
243            asm ("mult %[a], %[b] \t\n"
244            "li  %[tmp],1 \t\n"
245            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
246            "mflo  %[res]   \t\n"
247            "addu %[tmp1],%[tmp],%[res] \t\n"
248            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
249            "sra  %[tmp2],%[tmp],0x1f \t\n"
250            "addu  %[res],%[res],%[tmp] \t\n"
251            "mfhi  %[tmp]   \t\n"
252            "addu  %[tmp],%[tmp],%[tmp2] \t\n"
253            "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
254            "srl   %[tmp2],%[res],%[rshift]    \t\n"
255            "srav  %[res], %[tmp],%[rshift]\t\n"
256            "sll   %[tmp],%[tmp],1 \t\n"
257            "sll   %[tmp],%[tmp],%[norbits] \t\n"
258            "or    %[tmp],%[tmp],%[tmp2] \t\n"
259            "movz  %[res],%[tmp],%[bit5] \t\n"
260            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
261            : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
262            : "%hi","%lo"
263            );
264        }
265    } else {
266        asm ("mult %[a], %[b] \t\n"
267        "li  %[tmp],1 \t\n"
268        "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
269        "mflo  %[res]   \t\n"
270        "addu %[tmp1],%[tmp],%[res] \t\n"
271        "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
272        "sra  %[tmp2],%[tmp],0x1f \t\n"
273        "addu  %[res],%[res],%[tmp] \t\n"
274        "mfhi  %[tmp]   \t\n"
275        "addu  %[tmp],%[tmp],%[tmp2] \t\n"
276        "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
277        "srl   %[tmp2],%[res],%[rshift]    \t\n"
278        "srav  %[res], %[tmp],%[rshift]\t\n"
279        "sll   %[tmp],%[tmp],1 \t\n"
280        "sll   %[tmp],%[tmp],%[norbits] \t\n"
281        "or    %[tmp],%[tmp],%[tmp2] \t\n"
282        "movz  %[res],%[tmp],%[bit5] \t\n"
283         : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
284         : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
285         : "%hi","%lo"
286         );
287        }
288
289        return result;
290}
291
292inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
293inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
294    GGLfixed result,t,tmp1,tmp2;
295
296    if (__builtin_constant_p(shift)) {
297        if (shift == 0) {
298                 asm ("mult %[a], %[b] \t\n"
299                 "mflo  %[lo]   \t\n"
300                 "addu  %[lo],%[lo],%[c]    \t\n"
301                 : [lo]"=&r"(result)
302                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
303                 : "%hi","%lo"
304                 );
305                } else if (shift == 32) {
306                    asm ("mult %[a], %[b] \t\n"
307                    "mfhi  %[lo]   \t\n"
308                    "addu  %[lo],%[lo],%[c]    \t\n"
309                    : [lo]"=&r"(result)
310                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
311                    : "%hi","%lo"
312                    );
313                } else if ((shift>0) && (shift<32)) {
314                    asm ("mult %[a], %[b] \t\n"
315                    "mflo  %[res]   \t\n"
316                    "mfhi  %[t]   \t\n"
317                    "srl   %[res],%[res],%[rshift]    \t\n"
318                    "sll   %[t],%[t],%[lshift]     \t\n"
319                    "or  %[res],%[res],%[t]    \t\n"
320                    "addu  %[res],%[res],%[c]    \t\n"
321                    : [res]"=&r"(result),[t]"=&r"(t)
322                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
323                    : "%hi","%lo"
324                    );
325                } else {
326                    asm ("mult %[a], %[b] \t\n"
327                    "nor %[tmp1],$zero,%[shift]\t\n"
328                    "mflo  %[res]   \t\n"
329                    "mfhi  %[t]   \t\n"
330                    "srl   %[res],%[res],%[shift]    \t\n"
331                    "sll   %[tmp2],%[t],1     \t\n"
332                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
333                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
334                    "srav  %[res],%[t],%[shift]     \t\n"
335                    "andi %[tmp2],%[shift],0x20\t\n"
336                    "movz %[res],%[tmp1],%[tmp2]\t\n"
337                    "addu  %[res],%[res],%[c]    \t\n"
338                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
339                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
340                    : "%hi","%lo"
341                    );
342                }
343            } else {
344                asm ("mult %[a], %[b] \t\n"
345                "nor %[tmp1],$zero,%[shift]\t\n"
346                "mflo  %[res]   \t\n"
347                "mfhi  %[t]   \t\n"
348                "srl   %[res],%[res],%[shift]    \t\n"
349                "sll   %[tmp2],%[t],1     \t\n"
350                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
351                "or  %[tmp1],%[tmp2],%[res]    \t\n"
352                "srav  %[res],%[t],%[shift]     \t\n"
353                "andi %[tmp2],%[shift],0x20\t\n"
354                "movz %[res],%[tmp1],%[tmp2]\t\n"
355                "addu  %[res],%[res],%[c]    \t\n"
356                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
357                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
358                : "%hi","%lo"
359                );
360            }
361            return result;
362}
363
364inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
365inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
366    GGLfixed result,t,tmp1,tmp2;
367
368    if (__builtin_constant_p(shift)) {
369        if (shift == 0) {
370                 asm ("mult %[a], %[b] \t\n"
371                 "mflo  %[lo]   \t\n"
372                 "subu  %[lo],%[lo],%[c]    \t\n"
373                 : [lo]"=&r"(result)
374                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
375                 : "%hi","%lo"
376                 );
377                } else if (shift == 32) {
378                    asm ("mult %[a], %[b] \t\n"
379                    "mfhi  %[lo]   \t\n"
380                    "subu  %[lo],%[lo],%[c]    \t\n"
381                    : [lo]"=&r"(result)
382                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
383                    : "%hi","%lo"
384                    );
385                } else if ((shift>0) && (shift<32)) {
386                    asm ("mult %[a], %[b] \t\n"
387                    "mflo  %[res]   \t\n"
388                    "mfhi  %[t]   \t\n"
389                    "srl   %[res],%[res],%[rshift]    \t\n"
390                    "sll   %[t],%[t],%[lshift]     \t\n"
391                    "or  %[res],%[res],%[t]    \t\n"
392                    "subu  %[res],%[res],%[c]    \t\n"
393                    : [res]"=&r"(result),[t]"=&r"(t)
394                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
395                    : "%hi","%lo"
396                    );
397                } else {
398                    asm ("mult %[a], %[b] \t\n"
399                    "nor %[tmp1],$zero,%[shift]\t\n"
400                     "mflo  %[res]   \t\n"
401                     "mfhi  %[t]   \t\n"
402                     "srl   %[res],%[res],%[shift]    \t\n"
403                     "sll   %[tmp2],%[t],1     \t\n"
404                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
405                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
406                     "srav  %[res],%[t],%[shift]     \t\n"
407                     "andi %[tmp2],%[shift],0x20\t\n"
408                     "movz %[res],%[tmp1],%[tmp2]\t\n"
409                     "subu  %[res],%[res],%[c]    \t\n"
410                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
411                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
412                     : "%hi","%lo"
413                     );
414                    }
415                } else {
416                asm ("mult %[a], %[b] \t\n"
417                "nor %[tmp1],$zero,%[shift]\t\n"
418                "mflo  %[res]   \t\n"
419                "mfhi  %[t]   \t\n"
420                "srl   %[res],%[res],%[shift]    \t\n"
421                "sll   %[tmp2],%[t],1     \t\n"
422                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
423                "or  %[tmp1],%[tmp2],%[res]    \t\n"
424                "srav  %[res],%[t],%[shift]     \t\n"
425                "andi %[tmp2],%[shift],0x20\t\n"
426                "movz %[res],%[tmp1],%[tmp2]\t\n"
427                "subu  %[res],%[res],%[c]    \t\n"
428                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
429                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
430                : "%hi","%lo"
431                );
432            }
433    return result;
434}
435
436inline int64_t gglMulii(int32_t x, int32_t y) CONST;
437inline int64_t gglMulii(int32_t x, int32_t y) {
438    union {
439        struct {
440#if defined(__MIPSEL__)
441            int32_t lo;
442            int32_t hi;
443#elif defined(__MIPSEB__)
444            int32_t hi;
445            int32_t lo;
446#endif
447        } s;
448        int64_t res;
449    }u;
450    asm("mult %2, %3 \t\n"
451        "mfhi %1   \t\n"
452        "mflo %0   \t\n"
453        : "=r"(u.s.lo), "=&r"(u.s.hi)
454        : "%r"(x), "r"(y)
455	: "%hi","%lo"
456        );
457    return u.res;
458}
459
460#elif defined(__aarch64__)
461
462// inline AArch64 implementations
463
464inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
465inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift)
466{
467    GGLfixed result;
468    GGLfixed round;
469
470    asm("mov    %x[round], #1                        \n"
471        "lsl    %x[round], %x[round], %x[shift]      \n"
472        "lsr    %x[round], %x[round], #1             \n"
473        "smaddl %x[result], %w[x], %w[y],%x[round]   \n"
474        "lsr    %x[result], %x[result], %x[shift]    \n"
475        : [round]"=&r"(round), [result]"=&r"(result) \
476        : [x]"r"(x), [y]"r"(y), [shift] "r"(shift)   \
477        :
478       );
479    return result;
480}
481inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
482inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
483{
484    GGLfixed result;
485    asm("smull  %x[result], %w[x], %w[y]                     \n"
486        "lsr    %x[result], %x[result], %x[shift]            \n"
487        "add    %w[result], %w[result], %w[a]                \n"
488        : [result]"=&r"(result)                               \
489        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
490        :
491        );
492    return result;
493}
494
495inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
496inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift)
497{
498
499    GGLfixed result;
500    int rshift;
501
502    asm("smull  %x[result], %w[x], %w[y]                     \n"
503        "lsr    %x[result], %x[result], %x[shift]            \n"
504        "sub    %w[result], %w[result], %w[a]                \n"
505        : [result]"=&r"(result)                               \
506        : [x]"r"(x), [y]"r"(y), [a]"r"(a), [shift] "r"(shift) \
507        :
508        );
509    return result;
510}
511inline int64_t gglMulii(int32_t x, int32_t y) CONST;
512inline int64_t gglMulii(int32_t x, int32_t y)
513{
514    int64_t res;
515    asm("smull  %x0, %w1, %w2 \n"
516        : "=r"(res)
517        : "%r"(x), "r"(y)
518        :
519        );
520    return res;
521}
522
523#elif defined(__mips__) && __mips_isa_rev == 6
524
525/*inline MIPS implementations*/
526inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
527inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
528    GGLfixed result,tmp,tmp1,tmp2;
529
530    if (__builtin_constant_p(shift)) {
531        if (shift == 0) {
532            asm ("mul %[res], %[a], %[b] \t\n"
533            : [res]"=&r"(result)
534            : [a]"r"(a),[b]"r"(b)
535            );
536        } else if (shift == 32)
537        {
538            asm ("mul %[res], %[a], %[b] \t\n"
539            "li  %[tmp],1\t\n"
540            "sll  %[tmp],%[tmp],0x1f\t\n"
541            "addu %[tmp1],%[tmp],%[res] \t\n"
542            "muh %[res], %[a], %[b] \t\n"
543            "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
544            "sra %[tmp],%[tmp],0x1f \t\n"
545            "addu %[res],%[res],%[tmp]\t\n"
546            "addu %[res],%[res],%[tmp1]\t\n"
547            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
548            : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
549            );
550        } else if ((shift >0) && (shift < 32))
551        {
552            asm ("mul %[res], %[a], %[b] \t\n"
553            "li  %[tmp],1 \t\n"
554            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
555            "addu %[tmp1],%[tmp],%[res] \t\n"
556            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
557            "addu  %[res],%[res],%[tmp] \t\n"
558            "muh %[tmp], %[a], %[b] \t\n"
559            "addu  %[tmp],%[tmp],%[tmp1] \t\n"
560            "sll   %[tmp],%[tmp],%[lshift] \t\n"
561            "srl   %[res],%[res],%[rshift]    \t\n"
562            "or    %[res],%[res],%[tmp] \t\n"
563            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
564            : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
565            );
566        } else {
567            asm ("mul %[res], %[a], %[b] \t\n"
568            "li  %[tmp],1 \t\n"
569            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
570            "addu %[tmp1],%[tmp],%[res] \t\n"
571            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
572            "sra  %[tmp2],%[tmp],0x1f \t\n"
573            "addu  %[res],%[res],%[tmp] \t\n"
574            "muh  %[tmp], %[a], %[b]   \t\n"
575            "addu  %[tmp],%[tmp],%[tmp2] \t\n"
576            "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
577            "srl   %[tmp2],%[res],%[rshift]    \t\n"
578            "srav  %[res], %[tmp],%[rshift]\t\n"
579            "sll   %[tmp],%[tmp],1 \t\n"
580            "sll   %[tmp],%[tmp],%[norbits] \t\n"
581            "or    %[tmp],%[tmp],%[tmp2] \t\n"
582            "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
583            "selnez  %[res],%[res],%[bit5] \t\n"
584            "or    %[res],%[res],%[tmp] \t\n"
585            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
586            : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
587            );
588        }
589    } else {
590        asm ("mul %[res], %[a], %[b] \t\n"
591        "li  %[tmp],1 \t\n"
592        "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
593        "addu %[tmp1],%[tmp],%[res] \t\n"
594        "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
595        "sra  %[tmp2],%[tmp],0x1f \t\n"
596        "addu  %[res],%[res],%[tmp] \t\n"
597        "muh  %[tmp], %[a], %[b] \t\n"
598        "addu  %[tmp],%[tmp],%[tmp2] \t\n"
599        "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
600        "srl   %[tmp2],%[res],%[rshift]    \t\n"
601        "srav  %[res], %[tmp],%[rshift]\t\n"
602        "sll   %[tmp],%[tmp],1 \t\n"
603        "sll   %[tmp],%[tmp],%[norbits] \t\n"
604        "or    %[tmp],%[tmp],%[tmp2] \t\n"
605        "seleqz  %[tmp],%[tmp],%[bit5] \t\n"
606        "selnez  %[res],%[res],%[bit5] \t\n"
607        "or    %[res],%[res],%[tmp] \t\n"
608         : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
609         : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
610         );
611        }
612        return result;
613}
614
615inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
616inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
617    GGLfixed result,t,tmp1,tmp2;
618
619    if (__builtin_constant_p(shift)) {
620        if (shift == 0) {
621                 asm ("mul %[lo], %[a], %[b] \t\n"
622                 "addu  %[lo],%[lo],%[c]    \t\n"
623                 : [lo]"=&r"(result)
624                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
625                 );
626                } else if (shift == 32) {
627                    asm ("muh %[lo], %[a], %[b] \t\n"
628                    "addu  %[lo],%[lo],%[c]    \t\n"
629                    : [lo]"=&r"(result)
630                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
631                    );
632                } else if ((shift>0) && (shift<32)) {
633                    asm ("mul %[res], %[a], %[b] \t\n"
634                    "muh  %[t], %[a], %[b] \t\n"
635                    "srl   %[res],%[res],%[rshift]    \t\n"
636                    "sll   %[t],%[t],%[lshift]     \t\n"
637                    "or  %[res],%[res],%[t]    \t\n"
638                    "addu  %[res],%[res],%[c]    \t\n"
639                    : [res]"=&r"(result),[t]"=&r"(t)
640                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
641                    );
642                } else {
643                    asm ("mul %[res], %[a], %[b] \t\n"
644                    "muh %[t], %[a], %[b] \t\n"
645                    "nor %[tmp1],$zero,%[shift]\t\n"
646                    "srl   %[res],%[res],%[shift]    \t\n"
647                    "sll   %[tmp2],%[t],1     \t\n"
648                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
649                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
650                    "srav  %[res],%[t],%[shift]     \t\n"
651                    "andi %[tmp2],%[shift],0x20\t\n"
652                    "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
653                    "selnez %[res],%[res],%[tmp2]\t\n"
654                    "or %[res],%[res],%[tmp1]\t\n"
655                    "addu  %[res],%[res],%[c]    \t\n"
656                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
657                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
658                    );
659                }
660            } else {
661                asm ("mul %[res], %[a], %[b] \t\n"
662                "muh %[t], %[a], %[b] \t\n"
663                "nor %[tmp1],$zero,%[shift]\t\n"
664                "srl   %[res],%[res],%[shift]    \t\n"
665                "sll   %[tmp2],%[t],1     \t\n"
666                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
667                "or  %[tmp1],%[tmp2],%[res]    \t\n"
668                "srav  %[res],%[t],%[shift]     \t\n"
669                "andi %[tmp2],%[shift],0x20\t\n"
670                "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
671                "selnez %[res],%[res],%[tmp2]\t\n"
672                "or %[res],%[res],%[tmp1]\t\n"
673                "addu  %[res],%[res],%[c]    \t\n"
674                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
675                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
676                );
677            }
678            return result;
679}
680
681inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
682inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
683    GGLfixed result,t,tmp1,tmp2;
684
685    if (__builtin_constant_p(shift)) {
686        if (shift == 0) {
687                 asm ("mul %[lo], %[a], %[b] \t\n"
688                 "subu  %[lo],%[lo],%[c]    \t\n"
689                 : [lo]"=&r"(result)
690                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
691                 );
692                } else if (shift == 32) {
693                    asm ("muh %[lo], %[a], %[b] \t\n"
694                    "subu  %[lo],%[lo],%[c]    \t\n"
695                    : [lo]"=&r"(result)
696                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
697                    );
698                } else if ((shift>0) && (shift<32)) {
699                    asm ("mul %[res], %[a], %[b] \t\n"
700                    "muh %[t], %[a], %[b] \t\n"
701                    "srl   %[res],%[res],%[rshift]    \t\n"
702                    "sll   %[t],%[t],%[lshift]     \t\n"
703                    "or  %[res],%[res],%[t]    \t\n"
704                    "subu  %[res],%[res],%[c]    \t\n"
705                    : [res]"=&r"(result),[t]"=&r"(t)
706                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
707                    );
708                } else {
709                    asm ("mul %[res], %[a], %[b] \t\n"
710                    "muh %[t], %[a], %[b] \t\n"
711                    "nor %[tmp1],$zero,%[shift]\t\n"
712                    "srl   %[res],%[res],%[shift]    \t\n"
713                    "sll   %[tmp2],%[t],1     \t\n"
714                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
715                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
716                    "srav  %[res],%[t],%[shift]     \t\n"
717                    "andi %[tmp2],%[shift],0x20\t\n"
718                    "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
719                    "selnez %[res],%[res],%[tmp2]\t\n"
720                    "or %[res],%[res],%[tmp1]\t\n"
721                    "subu  %[res],%[res],%[c]    \t\n"
722                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
723                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
724                     );
725                    }
726                } else {
727                asm ("mul %[res], %[a], %[b] \t\n"
728                "muh %[t], %[a], %[b] \t\n"
729                "nor %[tmp1],$zero,%[shift]\t\n"
730                "srl   %[res],%[res],%[shift]    \t\n"
731                "sll   %[tmp2],%[t],1     \t\n"
732                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
733                "or  %[tmp1],%[tmp2],%[res]    \t\n"
734                "srav  %[res],%[t],%[shift]     \t\n"
735                "andi %[tmp2],%[shift],0x20\t\n"
736                "seleqz %[tmp1],%[tmp1],%[tmp2]\t\n"
737                "selnez %[res],%[res],%[tmp2]\t\n"
738                "or %[res],%[res],%[tmp1]\t\n"
739                "subu  %[res],%[res],%[c]    \t\n"
740                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
741                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
742                );
743            }
744    return result;
745}
746
747inline int64_t gglMulii(int32_t x, int32_t y) CONST;
748inline int64_t gglMulii(int32_t x, int32_t y) {
749    union {
750        struct {
751#if defined(__MIPSEL__)
752            int32_t lo;
753            int32_t hi;
754#elif defined(__MIPSEB__)
755            int32_t hi;
756            int32_t lo;
757#endif
758        } s;
759        int64_t res;
760    }u;
761    asm("mul %0, %2, %3 \t\n"
762        "muh %1, %2, %3 \t\n"
763        : "=r"(u.s.lo), "=&r"(u.s.hi)
764        : "%r"(x), "r"(y)
765        );
766    return u.res;
767}
768
769#else // ----------------------------------------------------------------------
770
771inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
772inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
773    return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
774}
775inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
776inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
777    return GGLfixed((int64_t(a)*b)>>shift) + c;
778}
779inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
780inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
781    return GGLfixed((int64_t(a)*b)>>shift) - c;
782}
783inline int64_t gglMulii(int32_t a, int32_t b) CONST;
784inline int64_t gglMulii(int32_t a, int32_t b) {
785    return int64_t(a)*b;
786}
787
788#endif
789
790// ------------------------------------------------------------------------
791
792inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
793inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
794    return gglMulx(a, b, 16);
795}
796inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
797inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
798    return gglMulAddx(a, b, c, 16);
799}
800inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
801inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
802    return gglMulSubx(a, b, c, 16);
803}
804
805// ------------------------------------------------------------------------
806
807inline int32_t gglClz(int32_t x) CONST;
808inline int32_t gglClz(int32_t x)
809{
810#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__) || defined(__aarch64__)
811    return __builtin_clz(x);
812#else
813    if (!x) return 32;
814    int32_t exp = 31;
815    if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
816    if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
817    if (x & 0x000000f0) { exp -= 4; x >>= 4; }
818    if (x & 0x0000000c) { exp -= 2; x >>= 2; }
819    if (x & 0x00000002) { exp -= 1; }
820    return exp;
821#endif
822}
823
824// ------------------------------------------------------------------------
825
826int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
827
828inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
829inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
830    return gglDivQ(n, d, 16);
831}
832
833inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
834inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
835    return gglDivQ(n, d, 16);
836}
837
838// ------------------------------------------------------------------------
839
840inline GGLfixed gglRecipFast(GGLfixed x) CONST;
841inline GGLfixed gglRecipFast(GGLfixed x)
842{
843    // This is a really bad approximation of 1/x, but it's also
844    // very fast. x must be strictly positive.
845    // if x between [0.5, 1[ , then 1/x = 3-2*x
846    // (we use 2.30 fixed-point)
847    const int32_t lz = gglClz(x);
848    return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
849}
850
851// ------------------------------------------------------------------------
852
853inline GGLfixed gglClampx(GGLfixed c) CONST;
854inline GGLfixed gglClampx(GGLfixed c)
855{
856#if defined(__thumb__)
857    // clamp without branches
858    c &= ~(c>>31);  c = FIXED_ONE - c;
859    c &= ~(c>>31);  c = FIXED_ONE - c;
860#else
861#if defined(__arm__)
862    // I don't know why gcc thinks its smarter than me! The code below
863    // clamps to zero in one instruction, but gcc won't generate it and
864    // replace it by a cmp + movlt (it's quite amazing actually).
865    asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
866#elif defined(__aarch64__)
867    asm("bic %w0, %w1, %w1, asr #31\n" : "=r"(c) : "r"(c));
868#else
869    c &= ~(c>>31);
870#endif
871    if (c>FIXED_ONE)
872        c = FIXED_ONE;
873#endif
874    return c;
875}
876
877// ------------------------------------------------------------------------
878
879#endif // ANDROID_GGL_FIXED_H
880