1#include "rs_core.rsh"
2#include "rs_f16_util.h"
3
4extern float2 __attribute__((overloadable)) convert_float2(int2 c);
5extern float3 __attribute__((overloadable)) convert_float3(int3 c);
6extern float4 __attribute__((overloadable)) convert_float4(int4 c);
7
8extern int2 __attribute__((overloadable)) convert_int2(float2 c);
9extern int3 __attribute__((overloadable)) convert_int3(float3 c);
10extern int4 __attribute__((overloadable)) convert_int4(float4 c);
11
12
13extern float __attribute__((overloadable)) fmin(float v, float v2);
14extern float2 __attribute__((overloadable)) fmin(float2 v, float v2);
15extern float3 __attribute__((overloadable)) fmin(float3 v, float v2);
16extern float4 __attribute__((overloadable)) fmin(float4 v, float v2);
17
18extern float __attribute__((overloadable)) fmax(float v, float v2);
19extern float2 __attribute__((overloadable)) fmax(float2 v, float v2);
20extern float3 __attribute__((overloadable)) fmax(float3 v, float v2);
21extern float4 __attribute__((overloadable)) fmax(float4 v, float v2);
22
23// Float ops, 6.11.2
24
25#define FN_FUNC_FN(fnc)                                         \
26extern float2 __attribute__((overloadable)) fnc(float2 v) { \
27    float2 r;                                                   \
28    r.x = fnc(v.x);                                             \
29    r.y = fnc(v.y);                                             \
30    return r;                                                   \
31}                                                               \
32extern float3 __attribute__((overloadable)) fnc(float3 v) { \
33    float3 r;                                                   \
34    r.x = fnc(v.x);                                             \
35    r.y = fnc(v.y);                                             \
36    r.z = fnc(v.z);                                             \
37    return r;                                                   \
38}                                                               \
39extern float4 __attribute__((overloadable)) fnc(float4 v) { \
40    float4 r;                                                   \
41    r.x = fnc(v.x);                                             \
42    r.y = fnc(v.y);                                             \
43    r.z = fnc(v.z);                                             \
44    r.w = fnc(v.w);                                             \
45    return r;                                                   \
46}
47
48#define IN_FUNC_FN(fnc)                                         \
49extern int2 __attribute__((overloadable)) fnc(float2 v) {   \
50    int2 r;                                                     \
51    r.x = fnc(v.x);                                             \
52    r.y = fnc(v.y);                                             \
53    return r;                                                   \
54}                                                               \
55extern int3 __attribute__((overloadable)) fnc(float3 v) {   \
56    int3 r;                                                     \
57    r.x = fnc(v.x);                                             \
58    r.y = fnc(v.y);                                             \
59    r.z = fnc(v.z);                                             \
60    return r;                                                   \
61}                                                               \
62extern int4 __attribute__((overloadable)) fnc(float4 v) {   \
63    int4 r;                                                     \
64    r.x = fnc(v.x);                                             \
65    r.y = fnc(v.y);                                             \
66    r.z = fnc(v.z);                                             \
67    r.w = fnc(v.w);                                             \
68    return r;                                                   \
69}
70
71#define FN_FUNC_FN_FN(fnc)                                                  \
72extern float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
73    float2 r;                                                               \
74    r.x = fnc(v1.x, v2.x);                                                  \
75    r.y = fnc(v1.y, v2.y);                                                  \
76    return r;                                                               \
77}                                                                           \
78extern float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
79    float3 r;                                                               \
80    r.x = fnc(v1.x, v2.x);                                                  \
81    r.y = fnc(v1.y, v2.y);                                                  \
82    r.z = fnc(v1.z, v2.z);                                                  \
83    return r;                                                               \
84}                                                                           \
85extern float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
86    float4 r;                                                               \
87    r.x = fnc(v1.x, v2.x);                                                  \
88    r.y = fnc(v1.y, v2.y);                                                  \
89    r.z = fnc(v1.z, v2.z);                                                  \
90    r.w = fnc(v1.w, v2.w);                                                  \
91    return r;                                                               \
92}
93
94#define FN_FUNC_FN_F(fnc)                                                   \
95extern float2 __attribute__((overloadable)) fnc(float2 v1, float v2) {  \
96    float2 r;                                                               \
97    r.x = fnc(v1.x, v2);                                                    \
98    r.y = fnc(v1.y, v2);                                                    \
99    return r;                                                               \
100}                                                                           \
101extern float3 __attribute__((overloadable)) fnc(float3 v1, float v2) {  \
102    float3 r;                                                               \
103    r.x = fnc(v1.x, v2);                                                    \
104    r.y = fnc(v1.y, v2);                                                    \
105    r.z = fnc(v1.z, v2);                                                    \
106    return r;                                                               \
107}                                                                           \
108extern float4 __attribute__((overloadable)) fnc(float4 v1, float v2) {  \
109    float4 r;                                                               \
110    r.x = fnc(v1.x, v2);                                                    \
111    r.y = fnc(v1.y, v2);                                                    \
112    r.z = fnc(v1.z, v2);                                                    \
113    r.w = fnc(v1.w, v2);                                                    \
114    return r;                                                               \
115}
116
117#define FN_FUNC_FN_IN(fnc)                                                  \
118extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 v2) {   \
119    float2 r;                                                               \
120    r.x = fnc(v1.x, v2.x);                                                  \
121    r.y = fnc(v1.y, v2.y);                                                  \
122    return r;                                                               \
123}                                                                           \
124extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 v2) {   \
125    float3 r;                                                               \
126    r.x = fnc(v1.x, v2.x);                                                  \
127    r.y = fnc(v1.y, v2.y);                                                  \
128    r.z = fnc(v1.z, v2.z);                                                  \
129    return r;                                                               \
130}                                                                           \
131extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 v2) {   \
132    float4 r;                                                               \
133    r.x = fnc(v1.x, v2.x);                                                  \
134    r.y = fnc(v1.y, v2.y);                                                  \
135    r.z = fnc(v1.z, v2.z);                                                  \
136    r.w = fnc(v1.w, v2.w);                                                  \
137    return r;                                                               \
138}
139
140#define FN_FUNC_FN_I(fnc)                                                   \
141extern float2 __attribute__((overloadable)) fnc(float2 v1, int v2) {    \
142    float2 r;                                                               \
143    r.x = fnc(v1.x, v2);                                                    \
144    r.y = fnc(v1.y, v2);                                                    \
145    return r;                                                               \
146}                                                                           \
147extern float3 __attribute__((overloadable)) fnc(float3 v1, int v2) {    \
148    float3 r;                                                               \
149    r.x = fnc(v1.x, v2);                                                    \
150    r.y = fnc(v1.y, v2);                                                    \
151    r.z = fnc(v1.z, v2);                                                    \
152    return r;                                                               \
153}                                                                           \
154extern float4 __attribute__((overloadable)) fnc(float4 v1, int v2) {    \
155    float4 r;                                                               \
156    r.x = fnc(v1.x, v2);                                                    \
157    r.y = fnc(v1.y, v2);                                                    \
158    r.z = fnc(v1.z, v2);                                                    \
159    r.w = fnc(v1.w, v2);                                                    \
160    return r;                                                               \
161}
162
163#define FN_FUNC_FN_PFN(fnc)                     \
164extern float2 __attribute__((overloadable)) \
165        fnc(float2 v1, float2 *v2) {            \
166    float2 r;                                   \
167    float t[2];                                 \
168    r.x = fnc(v1.x, &t[0]);                     \
169    r.y = fnc(v1.y, &t[1]);                     \
170    v2->x = t[0];                               \
171    v2->y = t[1];                               \
172    return r;                                   \
173}                                               \
174extern float3 __attribute__((overloadable)) \
175        fnc(float3 v1, float3 *v2) {            \
176    float3 r;                                   \
177    float t[3];                                 \
178    r.x = fnc(v1.x, &t[0]);                     \
179    r.y = fnc(v1.y, &t[1]);                     \
180    r.z = fnc(v1.z, &t[2]);                     \
181    v2->x = t[0];                               \
182    v2->y = t[1];                               \
183    v2->z = t[2];                               \
184    return r;                                   \
185}                                               \
186extern float4 __attribute__((overloadable)) \
187        fnc(float4 v1, float4 *v2) {            \
188    float4 r;                                   \
189    float t[4];                                 \
190    r.x = fnc(v1.x, &t[0]);                     \
191    r.y = fnc(v1.y, &t[1]);                     \
192    r.z = fnc(v1.z, &t[2]);                     \
193    r.w = fnc(v1.w, &t[3]);                     \
194    v2->x = t[0];                               \
195    v2->y = t[1];                               \
196    v2->z = t[2];                               \
197    v2->w = t[3];                               \
198    return r;                                   \
199}
200
201#define FN_FUNC_FN_PIN(fnc)                                                 \
202extern float2 __attribute__((overloadable)) fnc(float2 v1, int2 *v2) {  \
203    float2 r;                                                               \
204    int t[2];                                                               \
205    r.x = fnc(v1.x, &t[0]);                                                 \
206    r.y = fnc(v1.y, &t[1]);                                                 \
207    v2->x = t[0];                                                           \
208    v2->y = t[1];                                                           \
209    return r;                                                               \
210}                                                                           \
211extern float3 __attribute__((overloadable)) fnc(float3 v1, int3 *v2) {  \
212    float3 r;                                                               \
213    int t[3];                                                               \
214    r.x = fnc(v1.x, &t[0]);                                                 \
215    r.y = fnc(v1.y, &t[1]);                                                 \
216    r.z = fnc(v1.z, &t[2]);                                                 \
217    v2->x = t[0];                                                           \
218    v2->y = t[1];                                                           \
219    v2->z = t[2];                                                           \
220    return r;                                                               \
221}                                                                           \
222extern float4 __attribute__((overloadable)) fnc(float4 v1, int4 *v2) {  \
223    float4 r;                                                               \
224    int t[4];                                                               \
225    r.x = fnc(v1.x, &t[0]);                                                 \
226    r.y = fnc(v1.y, &t[1]);                                                 \
227    r.z = fnc(v1.z, &t[2]);                                                 \
228    r.w = fnc(v1.w, &t[3]);                                                 \
229    v2->x = t[0];                                                           \
230    v2->y = t[1];                                                           \
231    v2->z = t[2];                                                           \
232    v2->w = t[3];                                                           \
233    return r;                                                               \
234}
235
236#define FN_FUNC_FN_FN_FN(fnc)                   \
237extern float2 __attribute__((overloadable)) \
238        fnc(float2 v1, float2 v2, float2 v3) {  \
239    float2 r;                                   \
240    r.x = fnc(v1.x, v2.x, v3.x);                \
241    r.y = fnc(v1.y, v2.y, v3.y);                \
242    return r;                                   \
243}                                               \
244extern float3 __attribute__((overloadable)) \
245        fnc(float3 v1, float3 v2, float3 v3) {  \
246    float3 r;                                   \
247    r.x = fnc(v1.x, v2.x, v3.x);                \
248    r.y = fnc(v1.y, v2.y, v3.y);                \
249    r.z = fnc(v1.z, v2.z, v3.z);                \
250    return r;                                   \
251}                                               \
252extern float4 __attribute__((overloadable)) \
253        fnc(float4 v1, float4 v2, float4 v3) {  \
254    float4 r;                                   \
255    r.x = fnc(v1.x, v2.x, v3.x);                \
256    r.y = fnc(v1.y, v2.y, v3.y);                \
257    r.z = fnc(v1.z, v2.z, v3.z);                \
258    r.w = fnc(v1.w, v2.w, v3.w);                \
259    return r;                                   \
260}
261
262#define FN_FUNC_FN_FN_PIN(fnc)                  \
263extern float2 __attribute__((overloadable)) \
264        fnc(float2 v1, float2 v2, int2 *v3) {   \
265    float2 r;                                   \
266    int t[2];                                   \
267    r.x = fnc(v1.x, v2.x, &t[0]);               \
268    r.y = fnc(v1.y, v2.y, &t[1]);               \
269    v3->x = t[0];                               \
270    v3->y = t[1];                               \
271    return r;                                   \
272}                                               \
273extern float3 __attribute__((overloadable)) \
274        fnc(float3 v1, float3 v2, int3 *v3) {   \
275    float3 r;                                   \
276    int t[3];                                   \
277    r.x = fnc(v1.x, v2.x, &t[0]);               \
278    r.y = fnc(v1.y, v2.y, &t[1]);               \
279    r.z = fnc(v1.z, v2.z, &t[2]);               \
280    v3->x = t[0];                               \
281    v3->y = t[1];                               \
282    v3->z = t[2];                               \
283    return r;                                   \
284}                                               \
285extern float4 __attribute__((overloadable)) \
286        fnc(float4 v1, float4 v2, int4 *v3) {   \
287    float4 r;                                   \
288    int t[4];                                   \
289    r.x = fnc(v1.x, v2.x, &t[0]);               \
290    r.y = fnc(v1.y, v2.y, &t[1]);               \
291    r.z = fnc(v1.z, v2.z, &t[2]);               \
292    r.w = fnc(v1.w, v2.w, &t[3]);               \
293    v3->x = t[0];                               \
294    v3->y = t[1];                               \
295    v3->z = t[2];                               \
296    v3->w = t[3];                               \
297    return r;                                   \
298}
299
300static const unsigned int iposinf = 0x7f800000;
301static const unsigned int ineginf = 0xff800000;
302
303static float posinf() {
304    float f = *((float*)&iposinf);
305    return f;
306}
307
308static unsigned int float_bits(float f) {
309    /* TODO(jeanluc) Use this better approach once the Mac(SDK) build issues are fixed.
310    // Get the bits while following the strict aliasing rules.
311    unsigned int result;
312    memcpy(&result, &f, sizeof(f));
313    return result;
314    */
315    return *(unsigned int*)(char*)(&f);
316}
317
318static bool isinf(float f) {
319    unsigned int i = float_bits(f);
320    return (i == iposinf) || (i == ineginf);
321}
322
323static bool isnan(float f) {
324    unsigned int i = float_bits(f);
325    return (((i & 0x7f800000) == 0x7f800000) && (i & 0x007fffff));
326}
327
328static bool isposzero(float f) {
329    return (float_bits(f) == 0x00000000);
330}
331
332static bool isnegzero(float f) {
333    return (float_bits(f) == 0x80000000);
334}
335
336static bool iszero(float f) {
337    return isposzero(f) || isnegzero(f);
338}
339
340
341extern float __attribute__((overloadable)) SC_acosf(float);
342float __attribute__((overloadable)) acos(float v) {
343    return SC_acosf(v);
344}
345FN_FUNC_FN(acos)
346
347extern float __attribute__((overloadable)) SC_acoshf(float);
348float __attribute__((overloadable)) acosh(float v) {
349    return SC_acoshf(v);
350}
351FN_FUNC_FN(acosh)
352
353
354extern float __attribute__((overloadable)) acospi(float v) {
355    return acos(v) / M_PI;
356}
357FN_FUNC_FN(acospi)
358
359extern float __attribute__((overloadable)) SC_asinf(float);
360float __attribute__((overloadable)) asin(float v) {
361    return SC_asinf(v);
362}
363FN_FUNC_FN(asin)
364
365extern float __attribute__((overloadable)) SC_asinhf(float);
366float __attribute__((overloadable)) asinh(float v) {
367    return SC_asinhf(v);
368}
369FN_FUNC_FN(asinh)
370
371extern float __attribute__((overloadable)) asinpi(float v) {
372    return asin(v) / M_PI;
373}
374FN_FUNC_FN(asinpi)
375
376extern float __attribute__((overloadable)) SC_atanf(float);
377float __attribute__((overloadable)) atan(float v) {
378    return SC_atanf(v);
379}
380FN_FUNC_FN(atan)
381
382extern float __attribute__((overloadable)) SC_atan2f(float, float);
383float __attribute__((overloadable)) atan2(float v1, float v2) {
384    return SC_atan2f(v1, v2);
385}
386FN_FUNC_FN_FN(atan2)
387
388extern float __attribute__((overloadable)) SC_atanhf(float);
389float __attribute__((overloadable)) atanh(float v) {
390    return SC_atanhf(v);
391}
392FN_FUNC_FN(atanh)
393
394extern float __attribute__((overloadable)) atanpi(float v) {
395    return atan(v) / M_PI;
396}
397FN_FUNC_FN(atanpi)
398
399
400extern float __attribute__((overloadable)) atan2pi(float y, float x) {
401    return atan2(y, x) / M_PI;
402}
403FN_FUNC_FN_FN(atan2pi)
404
405extern float __attribute__((overloadable)) SC_cbrtf(float);
406float __attribute__((overloadable)) cbrt(float v) {
407    return SC_cbrtf(v);
408}
409FN_FUNC_FN(cbrt)
410
411extern float __attribute__((overloadable)) SC_ceilf(float);
412float __attribute__((overloadable)) ceil(float v) {
413    return SC_ceilf(v);
414}
415FN_FUNC_FN(ceil)
416
417extern float __attribute__((overloadable)) SC_copysignf(float, float);
418float __attribute__((overloadable)) copysign(float v1, float v2) {
419    return SC_copysignf(v1, v2);
420}
421FN_FUNC_FN_FN(copysign)
422
423extern float __attribute__((overloadable)) SC_cosf(float);
424float __attribute__((overloadable)) cos(float v) {
425    return SC_cosf(v);
426}
427FN_FUNC_FN(cos)
428
429extern float __attribute__((overloadable)) SC_coshf(float);
430float __attribute__((overloadable)) cosh(float v) {
431    return SC_coshf(v);
432}
433FN_FUNC_FN(cosh)
434
435extern float __attribute__((overloadable)) cospi(float v) {
436    return cos(v * M_PI);
437}
438FN_FUNC_FN(cospi)
439
440extern float __attribute__((overloadable)) SC_erfcf(float);
441float __attribute__((overloadable)) erfc(float v) {
442    return SC_erfcf(v);
443}
444FN_FUNC_FN(erfc)
445
446extern float __attribute__((overloadable)) SC_erff(float);
447float __attribute__((overloadable)) erf(float v) {
448    return SC_erff(v);
449}
450FN_FUNC_FN(erf)
451
452extern float __attribute__((overloadable)) SC_expf(float);
453float __attribute__((overloadable)) exp(float v) {
454    return SC_expf(v);
455}
456FN_FUNC_FN(exp)
457
458extern float __attribute__((overloadable)) SC_exp2f(float);
459float __attribute__((overloadable)) exp2(float v) {
460    return SC_exp2f(v);
461}
462FN_FUNC_FN(exp2)
463
464extern float __attribute__((overloadable)) pow(float, float);
465
466extern float __attribute__((overloadable)) exp10(float v) {
467    return exp2(v * 3.321928095f);
468}
469FN_FUNC_FN(exp10)
470
471extern float __attribute__((overloadable)) SC_expm1f(float);
472float __attribute__((overloadable)) expm1(float v) {
473    return SC_expm1f(v);
474}
475FN_FUNC_FN(expm1)
476
477extern float __attribute__((overloadable)) fabs(float v) {
478    int i = *((int*)(void*)&v) & 0x7fffffff;
479    return  *((float*)(void*)&i);
480}
481FN_FUNC_FN(fabs)
482
483extern float __attribute__((overloadable)) SC_fdimf(float, float);
484float __attribute__((overloadable)) fdim(float v1, float v2) {
485    return SC_fdimf(v1, v2);
486}
487FN_FUNC_FN_FN(fdim)
488
489extern float __attribute__((overloadable)) SC_floorf(float);
490float __attribute__((overloadable)) floor(float v) {
491    return SC_floorf(v);
492}
493FN_FUNC_FN(floor)
494
495extern float __attribute__((overloadable)) SC_fmaf(float, float, float);
496float __attribute__((overloadable)) fma(float v1, float v2, float v3) {
497    return SC_fmaf(v1, v2, v3);
498}
499FN_FUNC_FN_FN_FN(fma)
500
501extern float __attribute__((overloadable)) SC_fminf(float, float);
502
503extern float __attribute__((overloadable)) SC_fmodf(float, float);
504float __attribute__((overloadable)) fmod(float v1, float v2) {
505    return SC_fmodf(v1, v2);
506}
507FN_FUNC_FN_FN(fmod)
508
509extern float __attribute__((overloadable)) fract(float v, float *iptr) {
510    int i = (int)floor(v);
511    if (iptr) {
512        iptr[0] = i;
513    }
514    return fmin(v - i, 0x1.fffffep-1f);
515}
516FN_FUNC_FN_PFN(fract)
517
518extern float __attribute__((const, overloadable)) fract(float v) {
519    float unused;
520    return fract(v, &unused);
521}
522FN_FUNC_FN(fract)
523
524extern float __attribute__((overloadable)) SC_frexpf(float, int *);
525float __attribute__((overloadable)) frexp(float v1, int* v2) {
526    return SC_frexpf(v1, v2);
527}
528FN_FUNC_FN_PIN(frexp)
529
530extern float __attribute__((overloadable)) SC_hypotf(float, float);
531float __attribute__((overloadable)) hypot(float v1, float v2) {
532    return SC_hypotf(v1, v2);
533}
534FN_FUNC_FN_FN(hypot)
535
536extern int __attribute__((overloadable)) SC_ilogbf(float);
537int __attribute__((overloadable)) ilogb(float v) {
538    return SC_ilogbf(v);
539}
540IN_FUNC_FN(ilogb)
541
542extern float __attribute__((overloadable)) SC_ldexpf(float, int);
543float __attribute__((overloadable)) ldexp(float v1, int v2) {
544    return SC_ldexpf(v1, v2);
545}
546FN_FUNC_FN_IN(ldexp)
547FN_FUNC_FN_I(ldexp)
548
549extern float __attribute__((overloadable)) SC_lgammaf(float);
550float __attribute__((overloadable)) lgamma(float v) {
551    return SC_lgammaf(v);
552}
553FN_FUNC_FN(lgamma)
554extern float __attribute__((overloadable)) SC_lgammaf_r(float, int*);
555float __attribute__((overloadable)) lgamma(float v, int* ptr) {
556    return SC_lgammaf_r(v, ptr);
557}
558FN_FUNC_FN_PIN(lgamma)
559
560extern float __attribute__((overloadable)) SC_logf(float);
561float __attribute__((overloadable)) log(float v) {
562    return SC_logf(v);
563}
564FN_FUNC_FN(log)
565
566extern float __attribute__((overloadable)) SC_log10f(float);
567float __attribute__((overloadable)) log10(float v) {
568    return SC_log10f(v);
569}
570FN_FUNC_FN(log10)
571
572
573extern float __attribute__((overloadable)) log2(float v) {
574    return log10(v) * 3.321928095f;
575}
576FN_FUNC_FN(log2)
577
578extern float __attribute__((overloadable)) SC_log1pf(float);
579float __attribute__((overloadable)) log1p(float v) {
580    return SC_log1pf(v);
581}
582FN_FUNC_FN(log1p)
583
584extern float __attribute__((overloadable)) SC_logbf(float);
585float __attribute__((overloadable)) logb(float v) {
586    return SC_logbf(v);
587}
588FN_FUNC_FN(logb)
589
590extern float __attribute__((overloadable)) mad(float a, float b, float c) {
591    return a * b + c;
592}
593extern float2 __attribute__((overloadable)) mad(float2 a, float2 b, float2 c) {
594    return a * b + c;
595}
596extern float3 __attribute__((overloadable)) mad(float3 a, float3 b, float3 c) {
597    return a * b + c;
598}
599extern float4 __attribute__((overloadable)) mad(float4 a, float4 b, float4 c) {
600    return a * b + c;
601}
602
603extern float __attribute__((overloadable)) SC_modff(float, float *);
604float __attribute__((overloadable)) modf(float v1, float *v2) {
605    return SC_modff(v1, v2);
606}
607FN_FUNC_FN_PFN(modf);
608
609extern float __attribute__((overloadable)) nan(uint v) {
610    float f[1];
611    uint32_t *ip = (uint32_t *)f;
612    *ip = v | 0x7fc00000;
613    return f[0];
614}
615
616extern float __attribute__((overloadable)) SC_nextafterf(float, float);
617float __attribute__((overloadable)) nextafter(float v1, float v2) {
618    return SC_nextafterf(v1, v2);
619}
620FN_FUNC_FN_FN(nextafter)
621
622// This function must be defined here if we're compiling with debug info
623// (libclcore_g.bc), because we need a C source to get debug information.
624// Otherwise the implementation can be found in IR.
625#if defined(RS_G_RUNTIME)
626extern float __attribute__((overloadable)) SC_powf(float, float);
627float __attribute__((overloadable)) pow(float v1, float v2) {
628    return SC_powf(v1, v2);
629}
630#endif // defined(RS_G_RUNTIME)
631FN_FUNC_FN_FN(pow)
632
633extern float __attribute__((overloadable)) pown(float v, int p) {
634    /* The mantissa of a float has fewer bits than an int (24 effective vs. 31).
635     * For very large ints, we'll lose whether the exponent is even or odd, making
636     * the selection of a correct sign incorrect.  We correct this.  Use copysign
637     * to handle the negative zero case.
638     */
639    float sign = (p & 0x1) ? copysign(1.f, v) : 1.f;
640    float f = pow(v, (float)p);
641    return copysign(f, sign);
642}
643FN_FUNC_FN_IN(pown)
644
645extern float __attribute__((overloadable)) powr(float v, float p) {
646    return pow(v, p);
647}
648extern float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
649    return pow(v, p);
650}
651extern float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
652    return pow(v, p);
653}
654extern float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
655    return pow(v, p);
656}
657
658extern float __attribute__((overloadable)) SC_remainderf(float, float);
659float __attribute__((overloadable)) remainder(float v1, float v2) {
660    return SC_remainderf(v1, v2);
661}
662FN_FUNC_FN_FN(remainder)
663
664extern float __attribute__((overloadable)) SC_remquof(float, float, int *);
665float __attribute__((overloadable)) remquo(float v1, float v2, int *v3) {
666    return SC_remquof(v1, v2, v3);
667}
668FN_FUNC_FN_FN_PIN(remquo)
669
670extern float __attribute__((overloadable)) SC_rintf(float);
671float __attribute__((overloadable)) rint(float v) {
672    return SC_rintf(v);
673}
674FN_FUNC_FN(rint)
675
676extern float __attribute__((overloadable)) rootn(float v, int r) {
677    if (r == 0) {
678        return posinf();
679    }
680
681    if (iszero(v)) {
682        if (r < 0) {
683            if (r & 1) {
684                return copysign(posinf(), v);
685            } else {
686                return posinf();
687            }
688        } else {
689            if (r & 1) {
690                return copysign(0.f, v);
691            } else {
692                return 0.f;
693            }
694        }
695    }
696
697    if (!isinf(v) && !isnan(v) && (v < 0.f)) {
698        if (r & 1) {
699            return (-1.f * pow(-1.f * v, 1.f / r));
700        } else {
701            return nan(0);
702        }
703    }
704
705    return pow(v, 1.f / r);
706}
707FN_FUNC_FN_IN(rootn);
708
709extern float __attribute__((overloadable)) SC_roundf(float);
710float __attribute__((overloadable)) round(float v) {
711    return SC_roundf(v);
712}
713FN_FUNC_FN(round)
714
715extern float __attribute__((overloadable)) SC_randf2(float, float);
716float __attribute__((overloadable)) rsRand(float min, float max) {
717  return SC_randf2(min, max);
718}
719
720
721extern float __attribute__((overloadable)) rsqrt(float v) {
722    return 1.f / sqrt(v);
723}
724
725#if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
726// These functions must be defined here if we are not using the SSE
727// implementation, which includes when we are built as part of the
728// debug runtime (libclcore_debug.bc) or compiling with debug info.
729#if defined(RS_G_RUNTIME)
730extern float __attribute__((overloadable)) SC_sqrtf(float);
731float __attribute__((overloadable)) sqrt(float v) {
732    return SC_sqrtf(v);
733}
734#endif // defined(RS_G_RUNTIME)
735
736FN_FUNC_FN(sqrt)
737#else
738extern float2 __attribute__((overloadable)) sqrt(float2);
739extern float3 __attribute__((overloadable)) sqrt(float3);
740extern float4 __attribute__((overloadable)) sqrt(float4);
741#endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
742
743FN_FUNC_FN(rsqrt)
744
745extern float __attribute__((overloadable)) SC_sinf(float);
746float __attribute__((overloadable)) sin(float v) {
747    return SC_sinf(v);
748}
749FN_FUNC_FN(sin)
750
751extern float __attribute__((overloadable)) sincos(float v, float *cosptr) {
752    *cosptr = cos(v);
753    return sin(v);
754}
755extern float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
756    *cosptr = cos(v);
757    return sin(v);
758}
759extern float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
760    *cosptr = cos(v);
761    return sin(v);
762}
763extern float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
764    *cosptr = cos(v);
765    return sin(v);
766}
767
768extern float __attribute__((overloadable)) SC_sinhf(float);
769float __attribute__((overloadable)) sinh(float v) {
770    return SC_sinhf(v);
771}
772FN_FUNC_FN(sinh)
773
774extern float __attribute__((overloadable)) sinpi(float v) {
775    return sin(v * M_PI);
776}
777FN_FUNC_FN(sinpi)
778
779extern float __attribute__((overloadable)) SC_tanf(float);
780float __attribute__((overloadable)) tan(float v) {
781    return SC_tanf(v);
782}
783FN_FUNC_FN(tan)
784
785extern float __attribute__((overloadable)) SC_tanhf(float);
786float __attribute__((overloadable)) tanh(float v) {
787    return SC_tanhf(v);
788}
789FN_FUNC_FN(tanh)
790
791extern float __attribute__((overloadable)) tanpi(float v) {
792    return tan(v * M_PI);
793}
794FN_FUNC_FN(tanpi)
795
796
797extern float __attribute__((overloadable)) SC_tgammaf(float);
798float __attribute__((overloadable)) tgamma(float v) {
799    return SC_tgammaf(v);
800}
801FN_FUNC_FN(tgamma)
802
803extern float __attribute__((overloadable)) SC_truncf(float);
804float __attribute__((overloadable)) trunc(float v) {
805    return SC_truncf(v);
806}
807FN_FUNC_FN(trunc)
808
809// Int ops (partial), 6.11.3
810
811#define XN_FUNC_YN(typeout, fnc, typein)                                \
812extern typeout __attribute__((overloadable)) fnc(typein);               \
813extern typeout##2 __attribute__((overloadable)) fnc(typein##2 v) {  \
814    typeout##2 r;                                                       \
815    r.x = fnc(v.x);                                                     \
816    r.y = fnc(v.y);                                                     \
817    return r;                                                           \
818}                                                                       \
819extern typeout##3 __attribute__((overloadable)) fnc(typein##3 v) {  \
820    typeout##3 r;                                                       \
821    r.x = fnc(v.x);                                                     \
822    r.y = fnc(v.y);                                                     \
823    r.z = fnc(v.z);                                                     \
824    return r;                                                           \
825}                                                                       \
826extern typeout##4 __attribute__((overloadable)) fnc(typein##4 v) {  \
827    typeout##4 r;                                                       \
828    r.x = fnc(v.x);                                                     \
829    r.y = fnc(v.y);                                                     \
830    r.z = fnc(v.z);                                                     \
831    r.w = fnc(v.w);                                                     \
832    return r;                                                           \
833}
834
835
836#define UIN_FUNC_IN(fnc)          \
837XN_FUNC_YN(uchar, fnc, char)      \
838XN_FUNC_YN(ushort, fnc, short)    \
839XN_FUNC_YN(uint, fnc, int)
840
841#define IN_FUNC_IN(fnc)           \
842XN_FUNC_YN(uchar, fnc, uchar)     \
843XN_FUNC_YN(char, fnc, char)       \
844XN_FUNC_YN(ushort, fnc, ushort)   \
845XN_FUNC_YN(short, fnc, short)     \
846XN_FUNC_YN(uint, fnc, uint)       \
847XN_FUNC_YN(int, fnc, int)
848
849
850#define XN_FUNC_XN_XN_BODY(type, fnc, body)         \
851extern type __attribute__((overloadable))       \
852        fnc(type v1, type v2) {                     \
853    return body;                                    \
854}                                                   \
855extern type##2 __attribute__((overloadable))    \
856        fnc(type##2 v1, type##2 v2) {               \
857    type##2 r;                                      \
858    r.x = fnc(v1.x, v2.x);                          \
859    r.y = fnc(v1.y, v2.y);                          \
860    return r;                                       \
861}                                                   \
862extern type##3 __attribute__((overloadable))    \
863        fnc(type##3 v1, type##3 v2) {               \
864    type##3 r;                                      \
865    r.x = fnc(v1.x, v2.x);                          \
866    r.y = fnc(v1.y, v2.y);                          \
867    r.z = fnc(v1.z, v2.z);                          \
868    return r;                                       \
869}                                                   \
870extern type##4 __attribute__((overloadable))    \
871        fnc(type##4 v1, type##4 v2) {               \
872    type##4 r;                                      \
873    r.x = fnc(v1.x, v2.x);                          \
874    r.y = fnc(v1.y, v2.y);                          \
875    r.z = fnc(v1.z, v2.z);                          \
876    r.w = fnc(v1.w, v2.w);                          \
877    return r;                                       \
878}
879
880#define IN_FUNC_IN_IN_BODY(fnc, body) \
881XN_FUNC_XN_XN_BODY(uchar, fnc, body)  \
882XN_FUNC_XN_XN_BODY(char, fnc, body)   \
883XN_FUNC_XN_XN_BODY(ushort, fnc, body) \
884XN_FUNC_XN_XN_BODY(short, fnc, body)  \
885XN_FUNC_XN_XN_BODY(uint, fnc, body)   \
886XN_FUNC_XN_XN_BODY(int, fnc, body)    \
887XN_FUNC_XN_XN_BODY(float, fnc, body)
888
889
890/**
891 * abs
892 */
893extern uint32_t __attribute__((overloadable)) abs(int32_t v) {
894    if (v < 0)
895        return -v;
896    return v;
897}
898extern uint16_t __attribute__((overloadable)) abs(int16_t v) {
899    if (v < 0)
900        return -v;
901    return v;
902}
903extern uint8_t __attribute__((overloadable)) abs(int8_t v) {
904    if (v < 0)
905        return -v;
906    return v;
907}
908
909/**
910 * clz
911 * __builtin_clz only accepts a 32-bit unsigned int, so every input will be
912 * expanded to 32 bits. For our smaller data types, we need to subtract off
913 * these unused top bits (that will be always be composed of zeros).
914 */
915extern uint32_t __attribute__((overloadable)) clz(uint32_t v) {
916    return __builtin_clz(v);
917}
918extern uint16_t __attribute__((overloadable)) clz(uint16_t v) {
919    return __builtin_clz(v) - 16;
920}
921extern uint8_t __attribute__((overloadable)) clz(uint8_t v) {
922    return __builtin_clz(v) - 24;
923}
924extern int32_t __attribute__((overloadable)) clz(int32_t v) {
925    return __builtin_clz(v);
926}
927extern int16_t __attribute__((overloadable)) clz(int16_t v) {
928    return __builtin_clz(((uint32_t)v) & 0x0000ffff) - 16;
929}
930extern int8_t __attribute__((overloadable)) clz(int8_t v) {
931    return __builtin_clz(((uint32_t)v) & 0x000000ff) - 24;
932}
933
934
935UIN_FUNC_IN(abs)
936IN_FUNC_IN(clz)
937
938
939// 6.11.4
940
941
942extern float __attribute__((overloadable)) degrees(float radians) {
943    return radians * (180.f / M_PI);
944}
945extern float2 __attribute__((overloadable)) degrees(float2 radians) {
946    return radians * (180.f / M_PI);
947}
948extern float3 __attribute__((overloadable)) degrees(float3 radians) {
949    return radians * (180.f / M_PI);
950}
951extern float4 __attribute__((overloadable)) degrees(float4 radians) {
952    return radians * (180.f / M_PI);
953}
954
955extern float __attribute__((overloadable)) mix(float start, float stop, float amount) {
956    return start + (stop - start) * amount;
957}
958extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
959    return start + (stop - start) * amount;
960}
961extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
962    return start + (stop - start) * amount;
963}
964extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
965    return start + (stop - start) * amount;
966}
967extern float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
968    return start + (stop - start) * amount;
969}
970extern float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
971    return start + (stop - start) * amount;
972}
973extern float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
974    return start + (stop - start) * amount;
975}
976
977extern float __attribute__((overloadable)) radians(float degrees) {
978    return degrees * (M_PI / 180.f);
979}
980extern float2 __attribute__((overloadable)) radians(float2 degrees) {
981    return degrees * (M_PI / 180.f);
982}
983extern float3 __attribute__((overloadable)) radians(float3 degrees) {
984    return degrees * (M_PI / 180.f);
985}
986extern float4 __attribute__((overloadable)) radians(float4 degrees) {
987    return degrees * (M_PI / 180.f);
988}
989
990extern float __attribute__((overloadable)) step(float edge, float v) {
991    return (v < edge) ? 0.f : 1.f;
992}
993extern float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
994    float2 r;
995    r.x = (v.x < edge.x) ? 0.f : 1.f;
996    r.y = (v.y < edge.y) ? 0.f : 1.f;
997    return r;
998}
999extern float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
1000    float3 r;
1001    r.x = (v.x < edge.x) ? 0.f : 1.f;
1002    r.y = (v.y < edge.y) ? 0.f : 1.f;
1003    r.z = (v.z < edge.z) ? 0.f : 1.f;
1004    return r;
1005}
1006extern float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
1007    float4 r;
1008    r.x = (v.x < edge.x) ? 0.f : 1.f;
1009    r.y = (v.y < edge.y) ? 0.f : 1.f;
1010    r.z = (v.z < edge.z) ? 0.f : 1.f;
1011    r.w = (v.w < edge.w) ? 0.f : 1.f;
1012    return r;
1013}
1014extern float2 __attribute__((overloadable)) step(float2 edge, float v) {
1015    float2 r;
1016    r.x = (v < edge.x) ? 0.f : 1.f;
1017    r.y = (v < edge.y) ? 0.f : 1.f;
1018    return r;
1019}
1020extern float3 __attribute__((overloadable)) step(float3 edge, float v) {
1021    float3 r;
1022    r.x = (v < edge.x) ? 0.f : 1.f;
1023    r.y = (v < edge.y) ? 0.f : 1.f;
1024    r.z = (v < edge.z) ? 0.f : 1.f;
1025    return r;
1026}
1027extern float4 __attribute__((overloadable)) step(float4 edge, float v) {
1028    float4 r;
1029    r.x = (v < edge.x) ? 0.f : 1.f;
1030    r.y = (v < edge.y) ? 0.f : 1.f;
1031    r.z = (v < edge.z) ? 0.f : 1.f;
1032    r.w = (v < edge.w) ? 0.f : 1.f;
1033    return r;
1034}
1035extern float2 __attribute__((overloadable)) step(float edge, float2 v) {
1036    float2 r;
1037    r.x = (v.x < edge) ? 0.f : 1.f;
1038    r.y = (v.y < edge) ? 0.f : 1.f;
1039    return r;
1040}
1041extern float3 __attribute__((overloadable)) step(float edge, float3 v) {
1042    float3 r;
1043    r.x = (v.x < edge) ? 0.f : 1.f;
1044    r.y = (v.y < edge) ? 0.f : 1.f;
1045    r.z = (v.z < edge) ? 0.f : 1.f;
1046    return r;
1047}
1048extern float4 __attribute__((overloadable)) step(float edge, float4 v) {
1049    float4 r;
1050    r.x = (v.x < edge) ? 0.f : 1.f;
1051    r.y = (v.y < edge) ? 0.f : 1.f;
1052    r.z = (v.z < edge) ? 0.f : 1.f;
1053    r.w = (v.w < edge) ? 0.f : 1.f;
1054    return r;
1055}
1056
1057extern float __attribute__((overloadable)) sign(float v) {
1058    if (v > 0) return 1.f;
1059    if (v < 0) return -1.f;
1060    return v;
1061}
1062FN_FUNC_FN(sign)
1063
1064
1065// 6.11.5
1066extern float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
1067    float3 r;
1068    r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1069    r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1070    r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1071    return r;
1072}
1073
1074extern float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
1075    float4 r;
1076    r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1077    r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1078    r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1079    r.w = 0.f;
1080    return r;
1081}
1082
1083#if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
1084// These functions must be defined here if we are not using the SSE
1085// implementation, which includes when we are built as part of the
1086// debug runtime (libclcore_debug.bc) or compiling with debug info.
1087
1088extern float __attribute__((overloadable)) dot(float lhs, float rhs) {
1089    return lhs * rhs;
1090}
1091extern float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
1092    return lhs.x*rhs.x + lhs.y*rhs.y;
1093}
1094extern float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
1095    return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
1096}
1097extern float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
1098    return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
1099}
1100
1101extern float __attribute__((overloadable)) length(float v) {
1102    return fabs(v);
1103}
1104extern float __attribute__((overloadable)) length(float2 v) {
1105    return sqrt(v.x*v.x + v.y*v.y);
1106}
1107extern float __attribute__((overloadable)) length(float3 v) {
1108    return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1109}
1110extern float __attribute__((overloadable)) length(float4 v) {
1111    return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1112}
1113
1114#else
1115
1116extern float __attribute__((overloadable)) length(float v);
1117extern float __attribute__((overloadable)) length(float2 v);
1118extern float __attribute__((overloadable)) length(float3 v);
1119extern float __attribute__((overloadable)) length(float4 v);
1120
1121#endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
1122
1123extern float __attribute__((overloadable)) distance(float lhs, float rhs) {
1124    return length(lhs - rhs);
1125}
1126extern float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
1127    return length(lhs - rhs);
1128}
1129extern float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
1130    return length(lhs - rhs);
1131}
1132extern float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
1133    return length(lhs - rhs);
1134}
1135
1136/* For the normalization functions, vectors of length 0 should simply be
1137 * returned (i.e. all the components of that vector are 0).
1138 */
1139extern float __attribute__((overloadable)) normalize(float v) {
1140    if (v == 0.0f) {
1141        return 0.0f;
1142    } else if (v < 0.0f) {
1143        return -1.0f;
1144    } else {
1145        return 1.0f;
1146    }
1147}
1148extern float2 __attribute__((overloadable)) normalize(float2 v) {
1149    float l = length(v);
1150    return l == 0.0f ? v : v / l;
1151}
1152extern float3 __attribute__((overloadable)) normalize(float3 v) {
1153    float l = length(v);
1154    return l == 0.0f ? v : v / l;
1155}
1156extern float4 __attribute__((overloadable)) normalize(float4 v) {
1157    float l = length(v);
1158    return l == 0.0f ? v : v / l;
1159}
1160
1161extern float __attribute__((overloadable)) half_sqrt(float v) {
1162    return sqrt(v);
1163}
1164FN_FUNC_FN(half_sqrt)
1165
1166extern float __attribute__((overloadable)) fast_length(float v) {
1167    return fabs(v);
1168}
1169extern float __attribute__((overloadable)) fast_length(float2 v) {
1170    return half_sqrt(v.x*v.x + v.y*v.y);
1171}
1172extern float __attribute__((overloadable)) fast_length(float3 v) {
1173    return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1174}
1175extern float __attribute__((overloadable)) fast_length(float4 v) {
1176    return half_sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1177}
1178
1179extern float __attribute__((overloadable)) fast_distance(float lhs, float rhs) {
1180    return fast_length(lhs - rhs);
1181}
1182extern float __attribute__((overloadable)) fast_distance(float2 lhs, float2 rhs) {
1183    return fast_length(lhs - rhs);
1184}
1185extern float __attribute__((overloadable)) fast_distance(float3 lhs, float3 rhs) {
1186    return fast_length(lhs - rhs);
1187}
1188extern float __attribute__((overloadable)) fast_distance(float4 lhs, float4 rhs) {
1189    return fast_length(lhs - rhs);
1190}
1191
1192extern float __attribute__((overloadable)) half_rsqrt(float);
1193
1194/* For the normalization functions, vectors of length 0 should simply be
1195 * returned (i.e. all the components of that vector are 0).
1196 */
1197extern float __attribute__((overloadable)) fast_normalize(float v) {
1198    if (v == 0.0f) {
1199        return 0.0f;
1200    } else if (v < 0.0f) {
1201        return -1.0f;
1202    } else {
1203        return 1.0f;
1204    }
1205}
1206// If the length is 0, then rlength should be NaN.
1207extern float2 __attribute__((overloadable)) fast_normalize(float2 v) {
1208    float rlength = half_rsqrt(v.x*v.x + v.y*v.y);
1209    return (rlength == rlength) ? v * rlength : v;
1210}
1211extern float3 __attribute__((overloadable)) fast_normalize(float3 v) {
1212    float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z);
1213    return (rlength == rlength) ? v * rlength : v;
1214}
1215extern float4 __attribute__((overloadable)) fast_normalize(float4 v) {
1216    float rlength = half_rsqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
1217    return (rlength == rlength) ? v * rlength : v;
1218}
1219
1220extern float __attribute__((overloadable)) half_recip(float v) {
1221    return 1.f / v;
1222}
1223
1224/*
1225extern float __attribute__((overloadable)) approx_atan(float x) {
1226    if (x == 0.f)
1227        return 0.f;
1228    if (x < 0.f)
1229        return -1.f * approx_atan(-1.f * x);
1230    if (x > 1.f)
1231        return M_PI_2 - approx_atan(approx_recip(x));
1232    return x * approx_recip(1.f + 0.28f * x*x);
1233}
1234FN_FUNC_FN(approx_atan)
1235*/
1236
1237typedef union
1238{
1239  float fv;
1240  int32_t iv;
1241} ieee_float_shape_type;
1242
1243/* Get a 32 bit int from a float.  */
1244
1245#define GET_FLOAT_WORD(i,d)                 \
1246do {                                \
1247  ieee_float_shape_type gf_u;                   \
1248  gf_u.fv = (d);                     \
1249  (i) = gf_u.iv;                      \
1250} while (0)
1251
1252/* Set a float from a 32 bit int.  */
1253
1254#define SET_FLOAT_WORD(d,i)                 \
1255do {                                \
1256  ieee_float_shape_type sf_u;                   \
1257  sf_u.iv = (i);                      \
1258  (d) = sf_u.fv;                     \
1259} while (0)
1260
1261
1262
1263// Valid -125 to 125
1264extern float __attribute__((overloadable)) native_exp2(float v) {
1265    int32_t iv = (int)v;
1266    int32_t x = iv + (iv >> 31); // ~floor(v)
1267    float r = (v - x);
1268
1269    float fo;
1270    SET_FLOAT_WORD(fo, (x + 127) << 23);
1271
1272    r *= 0.694f; // ~ log(e) / log(2)
1273    float r2 = r*r;
1274    float adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1275    return fo * adj;
1276}
1277
1278extern float2 __attribute__((overloadable)) native_exp2(float2 v) {
1279    int2 iv = convert_int2(v);
1280    int2 x = iv + (iv >> (int2)31);//floor(v);
1281    float2 r = (v - convert_float2(x));
1282
1283    x += 127;
1284
1285    float2 fo = (float2)(x << (int2)23);
1286
1287    r *= 0.694f; // ~ log(e) / log(2)
1288    float2 r2 = r*r;
1289    float2 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1290    return fo * adj;
1291}
1292
1293extern float4 __attribute__((overloadable)) native_exp2(float4 v) {
1294    int4 iv = convert_int4(v);
1295    int4 x = iv + (iv >> (int4)31);//floor(v);
1296    float4 r = (v - convert_float4(x));
1297
1298    x += 127;
1299
1300    float4 fo = (float4)(x << (int4)23);
1301
1302    r *= 0.694f; // ~ log(e) / log(2)
1303    float4 r2 = r*r;
1304    float4 adj = 1.f + r + (r2 * 0.5f) + (r2*r * 0.166666f) + (r2*r2 * 0.0416666f);
1305    return fo * adj;
1306}
1307
1308extern float3 __attribute__((overloadable)) native_exp2(float3 v) {
1309    float4 t = 1.f;
1310    t.xyz = v;
1311    return native_exp2(t).xyz;
1312}
1313
1314
1315extern float __attribute__((overloadable)) native_exp(float v) {
1316    return native_exp2(v * 1.442695041f);
1317}
1318extern float2 __attribute__((overloadable)) native_exp(float2 v) {
1319    return native_exp2(v * 1.442695041f);
1320}
1321extern float3 __attribute__((overloadable)) native_exp(float3 v) {
1322    return native_exp2(v * 1.442695041f);
1323}
1324extern float4 __attribute__((overloadable)) native_exp(float4 v) {
1325    return native_exp2(v * 1.442695041f);
1326}
1327
1328extern float __attribute__((overloadable)) native_exp10(float v) {
1329    return native_exp2(v * 3.321928095f);
1330}
1331extern float2 __attribute__((overloadable)) native_exp10(float2 v) {
1332    return native_exp2(v * 3.321928095f);
1333}
1334extern float3 __attribute__((overloadable)) native_exp10(float3 v) {
1335    return native_exp2(v * 3.321928095f);
1336}
1337extern float4 __attribute__((overloadable)) native_exp10(float4 v) {
1338    return native_exp2(v * 3.321928095f);
1339}
1340
1341extern float __attribute__((overloadable)) native_log2(float v) {
1342    int32_t ibits;
1343    GET_FLOAT_WORD(ibits, v);
1344
1345    int32_t e = (ibits >> 23) & 0xff;
1346
1347    ibits &= 0x7fffff;
1348    ibits |= 127 << 23;
1349
1350    float ir;
1351    SET_FLOAT_WORD(ir, ibits);
1352    ir -= 1.5f;
1353    float ir2 = ir*ir;
1354    float adj2 = (0.405465108f / 0.693147181f) +
1355                 ((0.666666667f / 0.693147181f) * ir) -
1356                 ((0.222222222f / 0.693147181f) * ir2) +
1357                 ((0.098765432f / 0.693147181f) * ir*ir2) -
1358                 ((0.049382716f / 0.693147181f) * ir2*ir2) +
1359                 ((0.026337449f / 0.693147181f) * ir*ir2*ir2) -
1360                 ((0.014631916f / 0.693147181f) * ir2*ir2*ir2);
1361    return (float)(e - 127) + adj2;
1362}
1363extern float2 __attribute__((overloadable)) native_log2(float2 v) {
1364    float2 v2 = {native_log2(v.x), native_log2(v.y)};
1365    return v2;
1366}
1367extern float3 __attribute__((overloadable)) native_log2(float3 v) {
1368    float3 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z)};
1369    return v2;
1370}
1371extern float4 __attribute__((overloadable)) native_log2(float4 v) {
1372    float4 v2 = {native_log2(v.x), native_log2(v.y), native_log2(v.z), native_log2(v.w)};
1373    return v2;
1374}
1375
1376extern float __attribute__((overloadable)) native_log(float v) {
1377    return native_log2(v) * (1.f / 1.442695041f);
1378}
1379extern float2 __attribute__((overloadable)) native_log(float2 v) {
1380    return native_log2(v) * (1.f / 1.442695041f);
1381}
1382extern float3 __attribute__((overloadable)) native_log(float3 v) {
1383    return native_log2(v) * (1.f / 1.442695041f);
1384}
1385extern float4 __attribute__((overloadable)) native_log(float4 v) {
1386    return native_log2(v) * (1.f / 1.442695041f);
1387}
1388
1389extern float __attribute__((overloadable)) native_log10(float v) {
1390    return native_log2(v) * (1.f / 3.321928095f);
1391}
1392extern float2 __attribute__((overloadable)) native_log10(float2 v) {
1393    return native_log2(v) * (1.f / 3.321928095f);
1394}
1395extern float3 __attribute__((overloadable)) native_log10(float3 v) {
1396    return native_log2(v) * (1.f / 3.321928095f);
1397}
1398extern float4 __attribute__((overloadable)) native_log10(float4 v) {
1399    return native_log2(v) * (1.f / 3.321928095f);
1400}
1401
1402
1403extern float __attribute__((overloadable)) native_powr(float v, float y) {
1404    float v2 = native_log2(v);
1405    v2 = fmax(v2 * y, -125.f);
1406    return native_exp2(v2);
1407}
1408extern float2 __attribute__((overloadable)) native_powr(float2 v, float2 y) {
1409    float2 v2 = native_log2(v);
1410    v2 = fmax(v2 * y, -125.f);
1411    return native_exp2(v2);
1412}
1413extern float3 __attribute__((overloadable)) native_powr(float3 v, float3 y) {
1414    float3 v2 = native_log2(v);
1415    v2 = fmax(v2 * y, -125.f);
1416    return native_exp2(v2);
1417}
1418extern float4 __attribute__((overloadable)) native_powr(float4 v, float4 y) {
1419    float4 v2 = native_log2(v);
1420    v2 = fmax(v2 * y, -125.f);
1421    return native_exp2(v2);
1422}
1423
1424extern double __attribute__((overloadable)) min(double v1, double v2) {
1425    return v1 < v2 ? v1 : v2;
1426}
1427
1428extern double2 __attribute__((overloadable)) min(double2 v1, double2 v2) {
1429    double2 r;
1430    r.x = v1.x < v2.x ? v1.x : v2.x;
1431    r.y = v1.y < v2.y ? v1.y : v2.y;
1432    return r;
1433}
1434
1435extern double3 __attribute__((overloadable)) min(double3 v1, double3 v2) {
1436    double3 r;
1437    r.x = v1.x < v2.x ? v1.x : v2.x;
1438    r.y = v1.y < v2.y ? v1.y : v2.y;
1439    r.z = v1.z < v2.z ? v1.z : v2.z;
1440    return r;
1441}
1442
1443extern double4 __attribute__((overloadable)) min(double4 v1, double4 v2) {
1444    double4 r;
1445    r.x = v1.x < v2.x ? v1.x : v2.x;
1446    r.y = v1.y < v2.y ? v1.y : v2.y;
1447    r.z = v1.z < v2.z ? v1.z : v2.z;
1448    r.w = v1.w < v2.w ? v1.w : v2.w;
1449    return r;
1450}
1451
1452extern long __attribute__((overloadable)) min(long v1, long v2) {
1453    return v1 < v2 ? v1 : v2;
1454}
1455extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
1456    long2 r;
1457    r.x = v1.x < v2.x ? v1.x : v2.x;
1458    r.y = v1.y < v2.y ? v1.y : v2.y;
1459    return r;
1460}
1461extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
1462    long3 r;
1463    r.x = v1.x < v2.x ? v1.x : v2.x;
1464    r.y = v1.y < v2.y ? v1.y : v2.y;
1465    r.z = v1.z < v2.z ? v1.z : v2.z;
1466    return r;
1467}
1468extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
1469    long4 r;
1470    r.x = v1.x < v2.x ? v1.x : v2.x;
1471    r.y = v1.y < v2.y ? v1.y : v2.y;
1472    r.z = v1.z < v2.z ? v1.z : v2.z;
1473    r.w = v1.w < v2.w ? v1.w : v2.w;
1474    return r;
1475}
1476
1477extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
1478    return v1 < v2 ? v1 : v2;
1479}
1480extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
1481    ulong2 r;
1482    r.x = v1.x < v2.x ? v1.x : v2.x;
1483    r.y = v1.y < v2.y ? v1.y : v2.y;
1484    return r;
1485}
1486extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
1487    ulong3 r;
1488    r.x = v1.x < v2.x ? v1.x : v2.x;
1489    r.y = v1.y < v2.y ? v1.y : v2.y;
1490    r.z = v1.z < v2.z ? v1.z : v2.z;
1491    return r;
1492}
1493extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
1494    ulong4 r;
1495    r.x = v1.x < v2.x ? v1.x : v2.x;
1496    r.y = v1.y < v2.y ? v1.y : v2.y;
1497    r.z = v1.z < v2.z ? v1.z : v2.z;
1498    r.w = v1.w < v2.w ? v1.w : v2.w;
1499    return r;
1500}
1501
1502extern double __attribute__((overloadable)) max(double v1, double v2) {
1503    return v1 > v2 ? v1 : v2;
1504}
1505
1506extern double2 __attribute__((overloadable)) max(double2 v1, double2 v2) {
1507    double2 r;
1508    r.x = v1.x > v2.x ? v1.x : v2.x;
1509    r.y = v1.y > v2.y ? v1.y : v2.y;
1510    return r;
1511}
1512
1513extern double3 __attribute__((overloadable)) max(double3 v1, double3 v2) {
1514    double3 r;
1515    r.x = v1.x > v2.x ? v1.x : v2.x;
1516    r.y = v1.y > v2.y ? v1.y : v2.y;
1517    r.z = v1.z > v2.z ? v1.z : v2.z;
1518    return r;
1519}
1520
1521extern double4 __attribute__((overloadable)) max(double4 v1, double4 v2) {
1522    double4 r;
1523    r.x = v1.x > v2.x ? v1.x : v2.x;
1524    r.y = v1.y > v2.y ? v1.y : v2.y;
1525    r.z = v1.z > v2.z ? v1.z : v2.z;
1526    r.w = v1.w > v2.w ? v1.w : v2.w;
1527    return r;
1528}
1529
1530extern long __attribute__((overloadable)) max(long v1, long v2) {
1531    return v1 > v2 ? v1 : v2;
1532}
1533extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
1534    long2 r;
1535    r.x = v1.x > v2.x ? v1.x : v2.x;
1536    r.y = v1.y > v2.y ? v1.y : v2.y;
1537    return r;
1538}
1539extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
1540    long3 r;
1541    r.x = v1.x > v2.x ? v1.x : v2.x;
1542    r.y = v1.y > v2.y ? v1.y : v2.y;
1543    r.z = v1.z > v2.z ? v1.z : v2.z;
1544    return r;
1545}
1546extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
1547    long4 r;
1548    r.x = v1.x > v2.x ? v1.x : v2.x;
1549    r.y = v1.y > v2.y ? v1.y : v2.y;
1550    r.z = v1.z > v2.z ? v1.z : v2.z;
1551    r.w = v1.w > v2.w ? v1.w : v2.w;
1552    return r;
1553}
1554
1555extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
1556    return v1 > v2 ? v1 : v2;
1557}
1558extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
1559    ulong2 r;
1560    r.x = v1.x > v2.x ? v1.x : v2.x;
1561    r.y = v1.y > v2.y ? v1.y : v2.y;
1562    return r;
1563}
1564extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
1565    ulong3 r;
1566    r.x = v1.x > v2.x ? v1.x : v2.x;
1567    r.y = v1.y > v2.y ? v1.y : v2.y;
1568    r.z = v1.z > v2.z ? v1.z : v2.z;
1569    return r;
1570}
1571extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
1572    ulong4 r;
1573    r.x = v1.x > v2.x ? v1.x : v2.x;
1574    r.y = v1.y > v2.y ? v1.y : v2.y;
1575    r.z = v1.z > v2.z ? v1.z : v2.z;
1576    r.w = v1.w > v2.w ? v1.w : v2.w;
1577    return r;
1578}
1579
1580#define THUNK_NATIVE_F(fn) \
1581    float __attribute__((overloadable)) native_##fn(float v) { return fn(v);} \
1582    float2 __attribute__((overloadable)) native_##fn(float2 v) { return fn(v);} \
1583    float3 __attribute__((overloadable)) native_##fn(float3 v) { return fn(v);} \
1584    float4 __attribute__((overloadable)) native_##fn(float4 v) { return fn(v);}
1585
1586#define THUNK_NATIVE_F_F(fn) \
1587    float __attribute__((overloadable)) native_##fn(float v1, float v2) { return fn(v1, v2);} \
1588    float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 v2) { return fn(v1, v2);} \
1589    float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 v2) { return fn(v1, v2);} \
1590    float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 v2) { return fn(v1, v2);}
1591
1592#define THUNK_NATIVE_F_FP(fn) \
1593    float __attribute__((overloadable)) native_##fn(float v1, float *v2) { return fn(v1, v2);} \
1594    float2 __attribute__((overloadable)) native_##fn(float2 v1, float2 *v2) { return fn(v1, v2);} \
1595    float3 __attribute__((overloadable)) native_##fn(float3 v1, float3 *v2) { return fn(v1, v2);} \
1596    float4 __attribute__((overloadable)) native_##fn(float4 v1, float4 *v2) { return fn(v1, v2);}
1597
1598#define THUNK_NATIVE_F_I(fn) \
1599    float __attribute__((overloadable)) native_##fn(float v1, int v2) { return fn(v1, v2);} \
1600    float2 __attribute__((overloadable)) native_##fn(float2 v1, int2 v2) { return fn(v1, v2);} \
1601    float3 __attribute__((overloadable)) native_##fn(float3 v1, int3 v2) { return fn(v1, v2);} \
1602    float4 __attribute__((overloadable)) native_##fn(float4 v1, int4 v2) { return fn(v1, v2);}
1603
1604THUNK_NATIVE_F(acos)
1605THUNK_NATIVE_F(acosh)
1606THUNK_NATIVE_F(acospi)
1607THUNK_NATIVE_F(asin)
1608THUNK_NATIVE_F(asinh)
1609THUNK_NATIVE_F(asinpi)
1610THUNK_NATIVE_F(atan)
1611THUNK_NATIVE_F_F(atan2)
1612THUNK_NATIVE_F(atanh)
1613THUNK_NATIVE_F(atanpi)
1614THUNK_NATIVE_F_F(atan2pi)
1615THUNK_NATIVE_F(cbrt)
1616THUNK_NATIVE_F(cos)
1617THUNK_NATIVE_F(cosh)
1618THUNK_NATIVE_F(cospi)
1619THUNK_NATIVE_F(expm1)
1620THUNK_NATIVE_F_F(hypot)
1621THUNK_NATIVE_F(log1p)
1622THUNK_NATIVE_F_I(rootn)
1623THUNK_NATIVE_F(rsqrt)
1624THUNK_NATIVE_F(sqrt)
1625THUNK_NATIVE_F(sin)
1626THUNK_NATIVE_F_FP(sincos)
1627THUNK_NATIVE_F(sinh)
1628THUNK_NATIVE_F(sinpi)
1629THUNK_NATIVE_F(tan)
1630THUNK_NATIVE_F(tanh)
1631THUNK_NATIVE_F(tanpi)
1632
1633#undef THUNK_NATIVE_F
1634#undef THUNK_NATIVE_F_F
1635#undef THUNK_NATIVE_F_I
1636#undef THUNK_NATIVE_F_FP
1637
1638float __attribute__((overloadable)) native_normalize(float v) { return fast_normalize(v);}
1639float2 __attribute__((overloadable)) native_normalize(float2 v) { return fast_normalize(v);}
1640float3 __attribute__((overloadable)) native_normalize(float3 v) { return fast_normalize(v);}
1641float4 __attribute__((overloadable)) native_normalize(float4 v) { return fast_normalize(v);}
1642
1643float __attribute__((overloadable)) native_distance(float v1, float v2) { return fast_distance(v1, v2);}
1644float __attribute__((overloadable)) native_distance(float2 v1, float2 v2) { return fast_distance(v1, v2);}
1645float __attribute__((overloadable)) native_distance(float3 v1, float3 v2) { return fast_distance(v1, v2);}
1646float __attribute__((overloadable)) native_distance(float4 v1, float4 v2) { return fast_distance(v1, v2);}
1647
1648float __attribute__((overloadable)) native_length(float v) { return fast_length(v);}
1649float __attribute__((overloadable)) native_length(float2 v) { return fast_length(v);}
1650float __attribute__((overloadable)) native_length(float3 v) { return fast_length(v);}
1651float __attribute__((overloadable)) native_length(float4 v) { return fast_length(v);}
1652
1653float __attribute__((overloadable)) native_divide(float v1, float v2) { return v1 / v2;}
1654float2 __attribute__((overloadable)) native_divide(float2 v1, float2 v2) { return v1 / v2;}
1655float3 __attribute__((overloadable)) native_divide(float3 v1, float3 v2) { return v1 / v2;}
1656float4 __attribute__((overloadable)) native_divide(float4 v1, float4 v2) { return v1 / v2;}
1657
1658float __attribute__((overloadable)) native_recip(float v) { return 1.f / v;}
1659float2 __attribute__((overloadable)) native_recip(float2 v) { return ((float2)1.f) / v;}
1660float3 __attribute__((overloadable)) native_recip(float3 v) { return ((float3)1.f) / v;}
1661float4 __attribute__((overloadable)) native_recip(float4 v) { return ((float4)1.f) / v;}
1662
1663
1664
1665
1666
1667#undef FN_FUNC_FN
1668#undef IN_FUNC_FN
1669#undef FN_FUNC_FN_FN
1670#undef FN_FUNC_FN_F
1671#undef FN_FUNC_FN_IN
1672#undef FN_FUNC_FN_I
1673#undef FN_FUNC_FN_PFN
1674#undef FN_FUNC_FN_PIN
1675#undef FN_FUNC_FN_FN_FN
1676#undef FN_FUNC_FN_FN_PIN
1677#undef XN_FUNC_YN
1678#undef UIN_FUNC_IN
1679#undef IN_FUNC_IN
1680#undef XN_FUNC_XN_XN_BODY
1681#undef IN_FUNC_IN_IN_BODY
1682
1683static const unsigned short kHalfPositiveInfinity = 0x7c00;
1684
1685/* Define f16 functions of the form
1686 *     HN output = fn(HN input)
1687 * where HN is scalar or vector half type
1688 */
1689#define HN_FUNC_HN(fn)                                                    \
1690extern half __attribute__((overloadable)) fn(half h) {                    \
1691    return (half) fn((float) h);                                          \
1692}                                                                         \
1693extern half2 __attribute__((overloadable)) fn(half2 v) {                  \
1694  return convert_half2(fn(convert_float2(v)));                            \
1695}                                                                         \
1696extern half3 __attribute__((overloadable)) fn(half3 v) {                  \
1697  return convert_half3(fn(convert_float3(v)));                            \
1698}                                                                         \
1699extern half4 __attribute__((overloadable)) fn(half4 v) {                  \
1700  return convert_half4(fn(convert_float4(v)));                            \
1701}
1702
1703/* Define f16 functions of the form
1704 *     HN output = fn(HN input1, HN input2)
1705 * where HN is scalar or vector half type
1706 */
1707#define HN_FUNC_HN_HN(fn)                                                 \
1708extern half __attribute__((overloadable)) fn(half h1, half h2) {          \
1709    return (half) fn((float) h1, (float) h2);                             \
1710}                                                                         \
1711extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) {       \
1712  return convert_half2(fn(convert_float2(v1),                             \
1713                          convert_float2(v2)));                           \
1714}                                                                         \
1715extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) {       \
1716  return convert_half3(fn(convert_float3(v1),                             \
1717                          convert_float3(v2)));                           \
1718}                                                                         \
1719extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) {       \
1720  return convert_half4(fn(convert_float4(v1),                             \
1721                          convert_float4(v2)));                           \
1722}
1723
1724/* Define f16 functions of the form
1725 *     HN output = fn(HN input1, half input2)
1726 * where HN is scalar or vector half type
1727 */
1728#define HN_FUNC_HN_H(fn)                                                  \
1729extern half2 __attribute__((overloadable)) fn(half2 v1, half v2) {        \
1730  return convert_half2(fn(convert_float2(v1), (float) v2));               \
1731}                                                                         \
1732extern half3 __attribute__((overloadable)) fn(half3 v1, half v2) {        \
1733  return convert_half3(fn(convert_float3(v1), (float) v2));               \
1734}                                                                         \
1735extern half4 __attribute__((overloadable)) fn(half4 v1, half v2) {        \
1736  return convert_half4(fn(convert_float4(v1), (float) v2));               \
1737}
1738
1739/* Define f16 functions of the form
1740 *     HN output = fn(HN input1, HN input2, HN input3)
1741 * where HN is scalar or vector half type
1742 */
1743#define HN_FUNC_HN_HN_HN(fn)                                                   \
1744extern half __attribute__((overloadable)) fn(half h1, half h2, half h3) {      \
1745    return (half) fn((float) h1, (float) h2, (float) h3);                      \
1746}                                                                              \
1747extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2, half2 v3) {  \
1748  return convert_half2(fn(convert_float2(v1),                                  \
1749                          convert_float2(v2),                                  \
1750                          convert_float2(v3)));                                \
1751}                                                                              \
1752extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2, half3 v3) {  \
1753  return convert_half3(fn(convert_float3(v1),                                  \
1754                          convert_float3(v2),                                  \
1755                          convert_float3(v3)));                                \
1756}                                                                              \
1757extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2, half4 v3) {  \
1758  return convert_half4(fn(convert_float4(v1),                                  \
1759                          convert_float4(v2),                                  \
1760                          convert_float4(v3)));                                \
1761}
1762
1763/* Define f16 functions of the form
1764 *     HN output = fn(HN input1, IN input2)
1765 * where HN is scalar or vector half type and IN the equivalent integer type
1766 * of same vector length.
1767 */
1768#define HN_FUNC_HN_IN(fn)                                                 \
1769extern half __attribute__((overloadable)) fn(half h1, int v) {            \
1770    return (half) fn((float) h1, v);                                      \
1771}                                                                         \
1772extern half2 __attribute__((overloadable)) fn(half2 v1, int2 v2) {        \
1773  return convert_half2(fn(convert_float2(v1), v2));                       \
1774}                                                                         \
1775extern half3 __attribute__((overloadable)) fn(half3 v1, int3 v2) {        \
1776  return convert_half3(fn(convert_float3(v1), v2));                       \
1777}                                                                         \
1778extern half4 __attribute__((overloadable)) fn(half4 v1, int4 v2) {        \
1779  return convert_half4(fn(convert_float4(v1), v2));                       \
1780}
1781
1782/* Define f16 functions of the form
1783 *     half output = fn(HN input1)
1784 * where HN is a scalar or vector half type.
1785 */
1786#define H_FUNC_HN(fn)                                                     \
1787extern half __attribute__((overloadable)) fn(half h) {                    \
1788    return (half) fn((float) h);                                          \
1789}                                                                         \
1790extern half __attribute__((overloadable)) fn(half2 v) {                   \
1791  return fn(convert_float2(v));                                           \
1792}                                                                         \
1793extern half __attribute__((overloadable)) fn(half3 v) {                   \
1794  return fn(convert_float3(v));                                           \
1795}                                                                         \
1796extern half __attribute__((overloadable)) fn(half4 v) {                   \
1797  return fn(convert_float4(v));                                           \
1798}
1799
1800/* Define f16 functions of the form
1801 *     half output = fn(HN input1, HN input2)
1802 * where HN is a scalar or vector half type.
1803 */
1804#define H_FUNC_HN_HN(fn)                                                  \
1805extern half __attribute__((overloadable)) fn(half h1, half h2) {          \
1806    return (half) fn((float) h1, (float) h2);                             \
1807}                                                                         \
1808extern half __attribute__((overloadable)) fn(half2 v1, half2 v2) {        \
1809  return fn(convert_float2(v1), convert_float2(v2));                      \
1810}                                                                         \
1811extern half __attribute__((overloadable)) fn(half3 v1, half3 v2) {        \
1812  return fn(convert_float3(v1), convert_float3(v2));                      \
1813}                                                                         \
1814extern half __attribute__((overloadable)) fn(half4 v1, half4 v2) {        \
1815  return fn(convert_float4(v1), convert_float4(v2));                      \
1816}
1817
1818#define SCALARIZE_HN_FUNC_HN_PHN(fnc)                                 \
1819extern half2 __attribute__((overloadable)) fnc(half2 v1, half2 *v2) { \
1820    half2 ret;                                                        \
1821    half t[2];                                                        \
1822    ret.x = fnc(v1.x, &t[0]);                                         \
1823    ret.y = fnc(v1.y, &t[1]);                                         \
1824    v2->x = t[0];                                                     \
1825    v2->y = t[1];                                                     \
1826    return ret;                                                       \
1827}                                                                     \
1828extern half3 __attribute__((overloadable)) fnc(half3 v1, half3 *v2) { \
1829    half3 ret;                                                        \
1830    half t[3];                                                        \
1831    ret.x = fnc(v1.x, &t[0]);                                         \
1832    ret.y = fnc(v1.y, &t[1]);                                         \
1833    ret.z = fnc(v1.z, &t[2]);                                         \
1834    v2->x = t[0];                                                     \
1835    v2->y = t[1];                                                     \
1836    v2->z = t[2];                                                     \
1837    return ret;                                                       \
1838}                                                                     \
1839extern half4 __attribute__((overloadable)) fnc(half4 v1, half4 *v2) { \
1840    half4 ret;                                                        \
1841    half t[4];                                                        \
1842    ret.x = fnc(v1.x, &t[0]);                                         \
1843    ret.y = fnc(v1.y, &t[1]);                                         \
1844    ret.z = fnc(v1.z, &t[2]);                                         \
1845    ret.w = fnc(v1.w, &t[3]);                                         \
1846    v2->x = t[0];                                                     \
1847    v2->y = t[1];                                                     \
1848    v2->z = t[2];                                                     \
1849    v2->w = t[3];                                                     \
1850    return ret;                                                       \
1851}
1852
1853/* Define f16 functions of the form
1854 *     HN output = fn(HN input1, HN input2)
1855 * where HN is a vector half type.  The functions are defined to call the
1856 * scalar function of the same name.
1857 */
1858#define SCALARIZE_HN_FUNC_HN_HN(fn)                                       \
1859extern half2 __attribute__((overloadable)) fn(half2 v1, half2 v2) {       \
1860  half2 ret;                                                              \
1861  ret.x = fn(v1.x, v2.x);                                                 \
1862  ret.y = fn(v1.y, v2.y);                                                 \
1863  return ret;                                                             \
1864}                                                                         \
1865extern half3 __attribute__((overloadable)) fn(half3 v1, half3 v2) {       \
1866  half3 ret;                                                              \
1867  ret.x = fn(v1.x, v2.x);                                                 \
1868  ret.y = fn(v1.y, v2.y);                                                 \
1869  ret.z = fn(v1.z, v2.z);                                                 \
1870  return ret;                                                             \
1871}                                                                         \
1872extern half4 __attribute__((overloadable)) fn(half4 v1, half4 v2) {       \
1873  half4 ret;                                                              \
1874  ret.x = fn(v1.x, v2.x);                                                 \
1875  ret.y = fn(v1.y, v2.y);                                                 \
1876  ret.z = fn(v1.z, v2.z);                                                 \
1877  ret.w = fn(v1.w, v2.w);                                                 \
1878  return ret;                                                             \
1879}                                                                         \
1880
1881HN_FUNC_HN(acos);
1882HN_FUNC_HN(acosh);
1883HN_FUNC_HN(acospi);
1884HN_FUNC_HN(asin);
1885HN_FUNC_HN(asinh);
1886HN_FUNC_HN(asinpi);
1887HN_FUNC_HN(atan);
1888HN_FUNC_HN(atanh);
1889HN_FUNC_HN(atanpi);
1890HN_FUNC_HN_HN(atan2);
1891HN_FUNC_HN_HN(atan2pi);
1892
1893HN_FUNC_HN(cbrt);
1894HN_FUNC_HN(ceil);
1895
1896extern half __attribute__((overloadable)) copysign(half x, half y);
1897SCALARIZE_HN_FUNC_HN_HN(copysign);
1898
1899HN_FUNC_HN(cos);
1900HN_FUNC_HN(cosh);
1901HN_FUNC_HN(cospi);
1902
1903extern half3 __attribute__((overloadable)) cross(half3 lhs, half3 rhs) {
1904    half3 r;
1905    r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1906    r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1907    r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1908    return r;
1909}
1910
1911extern half4 __attribute__((overloadable)) cross(half4 lhs, half4 rhs) {
1912    half4 r;
1913    r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
1914    r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
1915    r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
1916    r.w = 0.f;
1917    return r;
1918}
1919
1920HN_FUNC_HN(degrees);
1921H_FUNC_HN_HN(distance);
1922H_FUNC_HN_HN(dot);
1923
1924HN_FUNC_HN(erf);
1925HN_FUNC_HN(erfc);
1926HN_FUNC_HN(exp);
1927HN_FUNC_HN(exp10);
1928HN_FUNC_HN(exp2);
1929HN_FUNC_HN(expm1);
1930
1931HN_FUNC_HN(fabs);
1932HN_FUNC_HN_HN(fdim);
1933HN_FUNC_HN(floor);
1934HN_FUNC_HN_HN_HN(fma);
1935HN_FUNC_HN_HN(fmax);
1936HN_FUNC_HN_H(fmax);
1937HN_FUNC_HN_HN(fmin);
1938HN_FUNC_HN_H(fmin);
1939HN_FUNC_HN_HN(fmod);
1940
1941extern half __attribute__((overloadable)) fract(half v, half *iptr) {
1942    // maxLessThanOne = 0.99951171875, the largest value < 1.0
1943    half maxLessThanOne;
1944    SET_HALF_WORD(maxLessThanOne, 0x3bff);
1945
1946    int i = (int) floor(v);
1947    if (iptr) {
1948        *iptr = i;
1949    }
1950    // return v - floor(v), if strictly less than one
1951    return fmin(v - i, maxLessThanOne);
1952}
1953
1954SCALARIZE_HN_FUNC_HN_PHN(fract);
1955
1956extern half __attribute__((const, overloadable)) fract(half v) {
1957    half unused;
1958    return fract(v, &unused);
1959}
1960
1961extern half2 __attribute__((const, overloadable)) fract(half2 v) {
1962    half2 unused;
1963    return fract(v, &unused);
1964}
1965
1966extern half3 __attribute__((const, overloadable)) fract(half3 v) {
1967    half3 unused;
1968    return fract(v, &unused);
1969}
1970
1971extern half4 __attribute__((const, overloadable)) fract(half4 v) {
1972    half4 unused;
1973    return fract(v, &unused);
1974}
1975
1976extern half __attribute__((overloadable)) frexp(half x, int *eptr);
1977
1978extern half2 __attribute__((overloadable)) frexp(half2 v1, int2 *eptr) {
1979    half2 ret;
1980    int e[2];
1981    ret.x = frexp(v1.x, &e[0]);
1982    ret.y = frexp(v1.y, &e[1]);
1983    eptr->x = e[0];
1984    eptr->y = e[1];
1985    return ret;
1986}
1987
1988extern half3 __attribute__((overloadable)) frexp(half3 v1, int3 *eptr) {
1989    half3 ret;
1990    int e[3];
1991    ret.x = frexp(v1.x, &e[0]);
1992    ret.y = frexp(v1.y, &e[1]);
1993    ret.z = frexp(v1.z, &e[2]);
1994    eptr->x = e[0];
1995    eptr->y = e[1];
1996    eptr->z = e[2];
1997    return ret;
1998}
1999
2000extern half4 __attribute__((overloadable)) frexp(half4 v1, int4 *eptr) {
2001    half4 ret;
2002    int e[4];
2003    ret.x = frexp(v1.x, &e[0]);
2004    ret.y = frexp(v1.y, &e[1]);
2005    ret.z = frexp(v1.z, &e[2]);
2006    ret.w = frexp(v1.w, &e[3]);
2007    eptr->x = e[0];
2008    eptr->y = e[1];
2009    eptr->z = e[2];
2010    eptr->w = e[3];
2011    return ret;
2012}
2013
2014HN_FUNC_HN_HN(hypot);
2015
2016extern int __attribute__((overloadable)) ilogb(half x);
2017
2018extern int2 __attribute__((overloadable)) ilogb(half2 v) {
2019    int2 ret;
2020    ret.x = ilogb(v.x);
2021    ret.y = ilogb(v.y);
2022    return ret;
2023}
2024extern int3 __attribute__((overloadable)) ilogb(half3 v) {
2025    int3 ret;
2026    ret.x = ilogb(v.x);
2027    ret.y = ilogb(v.y);
2028    ret.z = ilogb(v.z);
2029    return ret;
2030}
2031extern int4 __attribute__((overloadable)) ilogb(half4 v) {
2032    int4 ret;
2033    ret.x = ilogb(v.x);
2034    ret.y = ilogb(v.y);
2035    ret.z = ilogb(v.z);
2036    ret.w = ilogb(v.w);
2037    return ret;
2038}
2039
2040HN_FUNC_HN_IN(ldexp);
2041extern half2 __attribute__((overloadable)) ldexp(half2 v, int exponent) {
2042    return convert_half2(ldexp(convert_float2(v), exponent));
2043}
2044extern half3 __attribute__((overloadable)) ldexp(half3 v, int exponent) {
2045    return convert_half3(ldexp(convert_float3(v), exponent));
2046}
2047extern half4 __attribute__((overloadable)) ldexp(half4 v, int exponent) {
2048    return convert_half4(ldexp(convert_float4(v), exponent));
2049}
2050
2051H_FUNC_HN(length);
2052HN_FUNC_HN(lgamma);
2053
2054extern half __attribute__((overloadable)) lgamma(half h, int *signp) {
2055    return (half) lgamma((float) h, signp);
2056}
2057extern half2 __attribute__((overloadable)) lgamma(half2 v, int2 *signp) {
2058    return convert_half2(lgamma(convert_float2(v), signp));
2059}
2060extern half3 __attribute__((overloadable)) lgamma(half3 v, int3 *signp) {
2061    return convert_half3(lgamma(convert_float3(v), signp));
2062}
2063extern half4 __attribute__((overloadable)) lgamma(half4 v, int4 *signp) {
2064    return convert_half4(lgamma(convert_float4(v), signp));
2065}
2066
2067HN_FUNC_HN(log);
2068HN_FUNC_HN(log10);
2069HN_FUNC_HN(log1p);
2070HN_FUNC_HN(log2);
2071HN_FUNC_HN(logb);
2072
2073HN_FUNC_HN_HN_HN(mad);
2074HN_FUNC_HN_HN(max);
2075HN_FUNC_HN_H(max); // TODO can this be arch-specific similar to _Z3maxDv2_ff?
2076HN_FUNC_HN_HN(min);
2077HN_FUNC_HN_H(min); // TODO can this be arch-specific similar to _Z3minDv2_ff?
2078
2079extern half __attribute__((overloadable)) mix(half start, half stop, half amount) {
2080    return start + (stop - start) * amount;
2081}
2082extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half2 amount) {
2083    return start + (stop - start) * amount;
2084}
2085extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half3 amount) {
2086    return start + (stop - start) * amount;
2087}
2088extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half4 amount) {
2089    return start + (stop - start) * amount;
2090}
2091extern half2 __attribute__((overloadable)) mix(half2 start, half2 stop, half amount) {
2092    return start + (stop - start) * amount;
2093}
2094extern half3 __attribute__((overloadable)) mix(half3 start, half3 stop, half amount) {
2095    return start + (stop - start) * amount;
2096}
2097extern half4 __attribute__((overloadable)) mix(half4 start, half4 stop, half amount) {
2098    return start + (stop - start) * amount;
2099}
2100
2101extern half __attribute__((overloadable)) modf(half x, half *iptr);
2102SCALARIZE_HN_FUNC_HN_PHN(modf);
2103
2104half __attribute__((overloadable)) nan_half() {
2105  unsigned short nan_short = kHalfPositiveInfinity | 0x0200;
2106  half nan;
2107  SET_HALF_WORD(nan, nan_short);
2108  return nan;
2109}
2110
2111HN_FUNC_HN(normalize);
2112
2113extern half __attribute__((overloadable)) nextafter(half x, half y);
2114SCALARIZE_HN_FUNC_HN_HN(nextafter);
2115
2116HN_FUNC_HN_HN(pow);
2117HN_FUNC_HN_IN(pown);
2118HN_FUNC_HN_HN(powr);
2119HN_FUNC_HN(radians);
2120HN_FUNC_HN_HN(remainder);
2121
2122extern half __attribute__((overloadable)) remquo(half n, half d, int *quo) {
2123    return (float) remquo((float) n, (float) d, quo);
2124}
2125extern half2 __attribute__((overloadable)) remquo(half2 n, half2 d, int2 *quo) {
2126    return convert_half2(remquo(convert_float2(d), convert_float2(n), quo));
2127}
2128extern half3 __attribute__((overloadable)) remquo(half3 n, half3 d, int3 *quo) {
2129    return convert_half3(remquo(convert_float3(d), convert_float3(n), quo));
2130}
2131extern half4 __attribute__((overloadable)) remquo(half4 n, half4 d, int4 *quo) {
2132    return convert_half4(remquo(convert_float4(d), convert_float4(n), quo));
2133}
2134
2135HN_FUNC_HN(rint);
2136HN_FUNC_HN_IN(rootn);
2137HN_FUNC_HN(round);
2138HN_FUNC_HN(rsqrt);
2139
2140extern half __attribute__((overloadable)) sign(half h) {
2141    if (h > 0) return (half) 1.f;
2142    if (h < 0) return (half) -1.f;
2143    return h;
2144}
2145extern half2 __attribute__((overloadable)) sign(half2 v) {
2146    half2 ret;
2147    ret.x = sign(v.x);
2148    ret.y = sign(v.y);
2149    return ret;
2150}
2151extern half3 __attribute__((overloadable)) sign(half3 v) {
2152    half3 ret;
2153    ret.x = sign(v.x);
2154    ret.y = sign(v.y);
2155    ret.z = sign(v.z);
2156    return ret;
2157}
2158extern half4 __attribute__((overloadable)) sign(half4 v) {
2159    half4 ret;
2160    ret.x = sign(v.x);
2161    ret.y = sign(v.y);
2162    ret.z = sign(v.z);
2163    ret.w = sign(v.w);
2164    return ret;
2165}
2166
2167HN_FUNC_HN(sin);
2168
2169extern half __attribute__((overloadable)) sincos(half v, half *cosptr) {
2170    *cosptr = cos(v);
2171    return sin(v);
2172}
2173// TODO verify if LLVM eliminates the duplicate convert_float2
2174extern half2 __attribute__((overloadable)) sincos(half2 v, half2 *cosptr) {
2175    *cosptr = cos(v);
2176    return sin(v);
2177}
2178extern half3 __attribute__((overloadable)) sincos(half3 v, half3 *cosptr) {
2179    *cosptr = cos(v);
2180    return sin(v);
2181}
2182extern half4 __attribute__((overloadable)) sincos(half4 v, half4 *cosptr) {
2183    *cosptr = cos(v);
2184    return sin(v);
2185}
2186
2187HN_FUNC_HN(sinh);
2188HN_FUNC_HN(sinpi);
2189HN_FUNC_HN(sqrt);
2190
2191extern half __attribute__((overloadable)) step(half edge, half v) {
2192    return (v < edge) ? 0.f : 1.f;
2193}
2194extern half2 __attribute__((overloadable)) step(half2 edge, half2 v) {
2195    half2 r;
2196    r.x = (v.x < edge.x) ? 0.f : 1.f;
2197    r.y = (v.y < edge.y) ? 0.f : 1.f;
2198    return r;
2199}
2200extern half3 __attribute__((overloadable)) step(half3 edge, half3 v) {
2201    half3 r;
2202    r.x = (v.x < edge.x) ? 0.f : 1.f;
2203    r.y = (v.y < edge.y) ? 0.f : 1.f;
2204    r.z = (v.z < edge.z) ? 0.f : 1.f;
2205    return r;
2206}
2207extern half4 __attribute__((overloadable)) step(half4 edge, half4 v) {
2208    half4 r;
2209    r.x = (v.x < edge.x) ? 0.f : 1.f;
2210    r.y = (v.y < edge.y) ? 0.f : 1.f;
2211    r.z = (v.z < edge.z) ? 0.f : 1.f;
2212    r.w = (v.w < edge.w) ? 0.f : 1.f;
2213    return r;
2214}
2215extern half2 __attribute__((overloadable)) step(half2 edge, half v) {
2216    half2 r;
2217    r.x = (v < edge.x) ? 0.f : 1.f;
2218    r.y = (v < edge.y) ? 0.f : 1.f;
2219    return r;
2220}
2221extern half3 __attribute__((overloadable)) step(half3 edge, half v) {
2222    half3 r;
2223    r.x = (v < edge.x) ? 0.f : 1.f;
2224    r.y = (v < edge.y) ? 0.f : 1.f;
2225    r.z = (v < edge.z) ? 0.f : 1.f;
2226    return r;
2227}
2228extern half4 __attribute__((overloadable)) step(half4 edge, half v) {
2229    half4 r;
2230    r.x = (v < edge.x) ? 0.f : 1.f;
2231    r.y = (v < edge.y) ? 0.f : 1.f;
2232    r.z = (v < edge.z) ? 0.f : 1.f;
2233    r.w = (v < edge.w) ? 0.f : 1.f;
2234    return r;
2235}
2236extern half2 __attribute__((overloadable)) step(half edge, half2 v) {
2237    half2 r;
2238    r.x = (v.x < edge) ? 0.f : 1.f;
2239    r.y = (v.y < edge) ? 0.f : 1.f;
2240    return r;
2241}
2242extern half3 __attribute__((overloadable)) step(half edge, half3 v) {
2243    half3 r;
2244    r.x = (v.x < edge) ? 0.f : 1.f;
2245    r.y = (v.y < edge) ? 0.f : 1.f;
2246    r.z = (v.z < edge) ? 0.f : 1.f;
2247    return r;
2248}
2249extern half4 __attribute__((overloadable)) step(half edge, half4 v) {
2250    half4 r;
2251    r.x = (v.x < edge) ? 0.f : 1.f;
2252    r.y = (v.y < edge) ? 0.f : 1.f;
2253    r.z = (v.z < edge) ? 0.f : 1.f;
2254    r.w = (v.w < edge) ? 0.f : 1.f;
2255    return r;
2256}
2257
2258HN_FUNC_HN(tan);
2259HN_FUNC_HN(tanh);
2260HN_FUNC_HN(tanpi);
2261HN_FUNC_HN(tgamma);
2262HN_FUNC_HN(trunc); // TODO: rethink: needs half-specific implementation?
2263
2264HN_FUNC_HN(native_acos);
2265HN_FUNC_HN(native_acosh);
2266HN_FUNC_HN(native_acospi);
2267HN_FUNC_HN(native_asin);
2268HN_FUNC_HN(native_asinh);
2269HN_FUNC_HN(native_asinpi);
2270HN_FUNC_HN(native_atan);
2271HN_FUNC_HN(native_atanh);
2272HN_FUNC_HN(native_atanpi);
2273HN_FUNC_HN_HN(native_atan2);
2274HN_FUNC_HN_HN(native_atan2pi);
2275
2276HN_FUNC_HN(native_cbrt);
2277HN_FUNC_HN(native_cos);
2278HN_FUNC_HN(native_cosh);
2279HN_FUNC_HN(native_cospi);
2280
2281H_FUNC_HN_HN(native_distance);
2282HN_FUNC_HN_HN(native_divide);
2283
2284HN_FUNC_HN(native_exp);
2285HN_FUNC_HN(native_exp10);
2286HN_FUNC_HN(native_exp2);
2287HN_FUNC_HN(native_expm1);
2288
2289HN_FUNC_HN_HN(native_hypot);
2290H_FUNC_HN(native_length);
2291
2292HN_FUNC_HN(native_log);
2293HN_FUNC_HN(native_log10);
2294HN_FUNC_HN(native_log1p);
2295HN_FUNC_HN(native_log2);
2296
2297HN_FUNC_HN(native_normalize);
2298
2299HN_FUNC_HN_HN(native_powr); // TODO are parameter limits different for half?
2300
2301HN_FUNC_HN(native_recip);
2302HN_FUNC_HN_IN(native_rootn);
2303HN_FUNC_HN(native_rsqrt);
2304
2305HN_FUNC_HN(native_sin);
2306
2307extern half __attribute__((overloadable)) native_sincos(half v, half *cosptr) {
2308    return sincos(v, cosptr);
2309}
2310extern half2 __attribute__((overloadable)) native_sincos(half2 v, half2 *cosptr) {
2311    return sincos(v, cosptr);
2312}
2313extern half3 __attribute__((overloadable)) native_sincos(half3 v, half3 *cosptr) {
2314    return sincos(v, cosptr);
2315}
2316extern half4 __attribute__((overloadable)) native_sincos(half4 v, half4 *cosptr) {
2317    return sincos(v, cosptr);
2318}
2319
2320HN_FUNC_HN(native_sinh);
2321HN_FUNC_HN(native_sinpi);
2322HN_FUNC_HN(native_sqrt);
2323
2324HN_FUNC_HN(native_tan);
2325HN_FUNC_HN(native_tanh);
2326HN_FUNC_HN(native_tanpi);
2327
2328#undef HN_FUNC_HN
2329#undef HN_FUNC_HN_HN
2330#undef HN_FUNC_HN_H
2331#undef HN_FUNC_HN_HN_HN
2332#undef HN_FUNC_HN_IN
2333#undef H_FUNC_HN
2334#undef H_FUNC_HN_HN
2335#undef SCALARIZE_HN_FUNC_HN_HN
2336
2337// exports unavailable mathlib functions to compat lib
2338
2339#ifdef RS_COMPATIBILITY_LIB
2340
2341// !!! DANGER !!!
2342// These functions are potentially missing on older Android versions.
2343// Work around the issue by supplying our own variants.
2344// !!! DANGER !!!
2345
2346// The logbl() implementation is taken from the latest bionic/, since
2347// double == long double on Android.
2348extern "C" long double logbl(long double x) { return logb(x); }
2349
2350// __aeabi_idiv0 is a missing function in libcompiler_rt.so, so we just
2351// pick the simplest implementation based on the ARM EABI doc.
2352extern "C" int __aeabi_idiv0(int v) { return v; }
2353
2354#endif // compatibility lib
2355