generic.c revision 5a47020542c52af3e879c1cd67674ca979ff0a18
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rs_types.rsh"
19
20extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
21extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
22extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
23extern float4 __attribute__((overloadable)) convert_float4(uchar4);
24extern float __attribute__((overloadable)) sqrt(float);
25
26/*
27 * CLAMP
28 */
29#define _CLAMP(T) \
30extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
31    return amount < low ? low : (amount > high ? high : amount);                    \
32}                                                                                   \
33                                                                                    \
34extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
35    T##2 r;                                                                         \
36    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
37    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
38    return r;                                                                       \
39}                                                                                   \
40                                                                                    \
41extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
42    T##3 r;                                                                         \
43    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
44    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
45    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
46    return r;                                                                       \
47}                                                                                   \
48                                                                                    \
49extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
50    T##4 r;                                                                         \
51    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
52    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
53    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
54    r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
55    return r;                                                                       \
56}                                                                                   \
57                                                                                    \
58extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
59    T##2 r;                                                                         \
60    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
61    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
62    return r;                                                                       \
63}                                                                                   \
64                                                                                    \
65extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
66    T##3 r;                                                                         \
67    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
68    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
69    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
70    return r;                                                                       \
71}                                                                                   \
72                                                                                    \
73extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
74    T##4 r;                                                                         \
75    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
76    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
77    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
78    r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
79    return r;                                                                       \
80}
81
82_CLAMP(float);
83_CLAMP(double);
84_CLAMP(char);
85_CLAMP(uchar);
86_CLAMP(short);
87_CLAMP(ushort);
88_CLAMP(int);
89_CLAMP(uint);
90_CLAMP(long);
91_CLAMP(ulong);
92
93#undef _CLAMP
94
95/*
96 * FMAX
97 */
98
99extern float __attribute__((overloadable)) fmax(float v1, float v2) {
100    return v1 > v2 ? v1 : v2;
101}
102
103extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
104    float2 r;
105    r.x = v1.x > v2.x ? v1.x : v2.x;
106    r.y = v1.y > v2.y ? v1.y : v2.y;
107    return r;
108}
109
110extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
111    float3 r;
112    r.x = v1.x > v2.x ? v1.x : v2.x;
113    r.y = v1.y > v2.y ? v1.y : v2.y;
114    r.z = v1.z > v2.z ? v1.z : v2.z;
115    return r;
116}
117
118extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
119    float4 r;
120    r.x = v1.x > v2.x ? v1.x : v2.x;
121    r.y = v1.y > v2.y ? v1.y : v2.y;
122    r.z = v1.z > v2.z ? v1.z : v2.z;
123    r.w = v1.w > v2.w ? v1.w : v2.w;
124    return r;
125}
126
127extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
128    float2 r;
129    r.x = v1.x > v2 ? v1.x : v2;
130    r.y = v1.y > v2 ? v1.y : v2;
131    return r;
132}
133
134extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
135    float3 r;
136    r.x = v1.x > v2 ? v1.x : v2;
137    r.y = v1.y > v2 ? v1.y : v2;
138    r.z = v1.z > v2 ? v1.z : v2;
139    return r;
140}
141
142extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
143    float4 r;
144    r.x = v1.x > v2 ? v1.x : v2;
145    r.y = v1.y > v2 ? v1.y : v2;
146    r.z = v1.z > v2 ? v1.z : v2;
147    r.w = v1.w > v2 ? v1.w : v2;
148    return r;
149}
150
151extern float __attribute__((overloadable)) fmin(float v1, float v2) {
152    return v1 < v2 ? v1 : v2;
153}
154
155
156/*
157 * FMIN
158 */
159extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
160    float2 r;
161    r.x = v1.x < v2.x ? v1.x : v2.x;
162    r.y = v1.y < v2.y ? v1.y : v2.y;
163    return r;
164}
165
166extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
167    float3 r;
168    r.x = v1.x < v2.x ? v1.x : v2.x;
169    r.y = v1.y < v2.y ? v1.y : v2.y;
170    r.z = v1.z < v2.z ? v1.z : v2.z;
171    return r;
172}
173
174extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
175    float4 r;
176    r.x = v1.x < v2.x ? v1.x : v2.x;
177    r.y = v1.y < v2.y ? v1.y : v2.y;
178    r.z = v1.z < v2.z ? v1.z : v2.z;
179    r.w = v1.w < v2.w ? v1.w : v2.w;
180    return r;
181}
182
183extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
184    float2 r;
185    r.x = v1.x < v2 ? v1.x : v2;
186    r.y = v1.y < v2 ? v1.y : v2;
187    return r;
188}
189
190extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
191    float3 r;
192    r.x = v1.x < v2 ? v1.x : v2;
193    r.y = v1.y < v2 ? v1.y : v2;
194    r.z = v1.z < v2 ? v1.z : v2;
195    return r;
196}
197
198extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
199    float4 r;
200    r.x = v1.x < v2 ? v1.x : v2;
201    r.y = v1.y < v2 ? v1.y : v2;
202    r.z = v1.z < v2 ? v1.z : v2;
203    r.w = v1.w < v2 ? v1.w : v2;
204    return r;
205}
206
207
208/*
209 * MAX
210 */
211
212extern char __attribute__((overloadable)) max(char v1, char v2) {
213    return v1 > v2 ? v1 : v2;
214}
215
216extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
217    char2 r;
218    r.x = v1.x > v2.x ? v1.x : v2.x;
219    r.y = v1.y > v2.y ? v1.y : v2.y;
220    return r;
221}
222
223extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
224    char3 r;
225    r.x = v1.x > v2.x ? v1.x : v2.x;
226    r.y = v1.y > v2.y ? v1.y : v2.y;
227    r.z = v1.z > v2.z ? v1.z : v2.z;
228    return r;
229}
230
231extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
232    char4 r;
233    r.x = v1.x > v2.x ? v1.x : v2.x;
234    r.y = v1.y > v2.y ? v1.y : v2.y;
235    r.z = v1.z > v2.z ? v1.z : v2.z;
236    r.w = v1.w > v2.w ? v1.w : v2.w;
237    return r;
238}
239
240extern short __attribute__((overloadable)) max(short v1, short v2) {
241    return v1 > v2 ? v1 : v2;
242}
243
244extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
245    short2 r;
246    r.x = v1.x > v2.x ? v1.x : v2.x;
247    r.y = v1.y > v2.y ? v1.y : v2.y;
248    return r;
249}
250
251extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
252    short3 r;
253    r.x = v1.x > v2.x ? v1.x : v2.x;
254    r.y = v1.y > v2.y ? v1.y : v2.y;
255    r.z = v1.z > v2.z ? v1.z : v2.z;
256    return r;
257}
258
259extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
260    short4 r;
261    r.x = v1.x > v2.x ? v1.x : v2.x;
262    r.y = v1.y > v2.y ? v1.y : v2.y;
263    r.z = v1.z > v2.z ? v1.z : v2.z;
264    r.w = v1.w > v2.w ? v1.w : v2.w;
265    return r;
266}
267
268extern int __attribute__((overloadable)) max(int v1, int v2) {
269    return v1 > v2 ? v1 : v2;
270}
271
272extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
273    int2 r;
274    r.x = v1.x > v2.x ? v1.x : v2.x;
275    r.y = v1.y > v2.y ? v1.y : v2.y;
276    return r;
277}
278
279extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
280    int3 r;
281    r.x = v1.x > v2.x ? v1.x : v2.x;
282    r.y = v1.y > v2.y ? v1.y : v2.y;
283    r.z = v1.z > v2.z ? v1.z : v2.z;
284    return r;
285}
286
287extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
288    int4 r;
289    r.x = v1.x > v2.x ? v1.x : v2.x;
290    r.y = v1.y > v2.y ? v1.y : v2.y;
291    r.z = v1.z > v2.z ? v1.z : v2.z;
292    r.w = v1.w > v2.w ? v1.w : v2.w;
293    return r;
294}
295
296extern int64_t __attribute__((overloadable)) max(int64_t v1, int64_t v2) {
297    return v1 > v2 ? v1 : v2;
298}
299
300extern long2 __attribute__((overloadable)) max(long2 v1, long2 v2) {
301    long2 r;
302    r.x = v1.x > v2.x ? v1.x : v2.x;
303    r.y = v1.y > v2.y ? v1.y : v2.y;
304    return r;
305}
306
307extern long3 __attribute__((overloadable)) max(long3 v1, long3 v2) {
308    long3 r;
309    r.x = v1.x > v2.x ? v1.x : v2.x;
310    r.y = v1.y > v2.y ? v1.y : v2.y;
311    r.z = v1.z > v2.z ? v1.z : v2.z;
312    return r;
313}
314
315extern long4 __attribute__((overloadable)) max(long4 v1, long4 v2) {
316    long4 r;
317    r.x = v1.x > v2.x ? v1.x : v2.x;
318    r.y = v1.y > v2.y ? v1.y : v2.y;
319    r.z = v1.z > v2.z ? v1.z : v2.z;
320    r.w = v1.w > v2.w ? v1.w : v2.w;
321    return r;
322}
323
324extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
325    return v1 > v2 ? v1 : v2;
326}
327
328extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
329    uchar2 r;
330    r.x = v1.x > v2.x ? v1.x : v2.x;
331    r.y = v1.y > v2.y ? v1.y : v2.y;
332    return r;
333}
334
335extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
336    uchar3 r;
337    r.x = v1.x > v2.x ? v1.x : v2.x;
338    r.y = v1.y > v2.y ? v1.y : v2.y;
339    r.z = v1.z > v2.z ? v1.z : v2.z;
340    return r;
341}
342
343extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
344    uchar4 r;
345    r.x = v1.x > v2.x ? v1.x : v2.x;
346    r.y = v1.y > v2.y ? v1.y : v2.y;
347    r.z = v1.z > v2.z ? v1.z : v2.z;
348    r.w = v1.w > v2.w ? v1.w : v2.w;
349    return r;
350}
351
352extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
353    return v1 > v2 ? v1 : v2;
354}
355
356extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
357    ushort2 r;
358    r.x = v1.x > v2.x ? v1.x : v2.x;
359    r.y = v1.y > v2.y ? v1.y : v2.y;
360    return r;
361}
362
363extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
364    ushort3 r;
365    r.x = v1.x > v2.x ? v1.x : v2.x;
366    r.y = v1.y > v2.y ? v1.y : v2.y;
367    r.z = v1.z > v2.z ? v1.z : v2.z;
368    return r;
369}
370
371extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
372    ushort4 r;
373    r.x = v1.x > v2.x ? v1.x : v2.x;
374    r.y = v1.y > v2.y ? v1.y : v2.y;
375    r.z = v1.z > v2.z ? v1.z : v2.z;
376    r.w = v1.w > v2.w ? v1.w : v2.w;
377    return r;
378}
379
380extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
381    return v1 > v2 ? v1 : v2;
382}
383
384extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
385    uint2 r;
386    r.x = v1.x > v2.x ? v1.x : v2.x;
387    r.y = v1.y > v2.y ? v1.y : v2.y;
388    return r;
389}
390
391extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
392    uint3 r;
393    r.x = v1.x > v2.x ? v1.x : v2.x;
394    r.y = v1.y > v2.y ? v1.y : v2.y;
395    r.z = v1.z > v2.z ? v1.z : v2.z;
396    return r;
397}
398
399extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
400    uint4 r;
401    r.x = v1.x > v2.x ? v1.x : v2.x;
402    r.y = v1.y > v2.y ? v1.y : v2.y;
403    r.z = v1.z > v2.z ? v1.z : v2.z;
404    r.w = v1.w > v2.w ? v1.w : v2.w;
405    return r;
406}
407
408extern ulong __attribute__((overloadable)) max(ulong v1, ulong v2) {
409    return v1 > v2 ? v1 : v2;
410}
411
412extern ulong2 __attribute__((overloadable)) max(ulong2 v1, ulong2 v2) {
413    ulong2 r;
414    r.x = v1.x > v2.x ? v1.x : v2.x;
415    r.y = v1.y > v2.y ? v1.y : v2.y;
416    return r;
417}
418
419extern ulong3 __attribute__((overloadable)) max(ulong3 v1, ulong3 v2) {
420    ulong3 r;
421    r.x = v1.x > v2.x ? v1.x : v2.x;
422    r.y = v1.y > v2.y ? v1.y : v2.y;
423    r.z = v1.z > v2.z ? v1.z : v2.z;
424    return r;
425}
426
427extern ulong4 __attribute__((overloadable)) max(ulong4 v1, ulong4 v2) {
428    ulong4 r;
429    r.x = v1.x > v2.x ? v1.x : v2.x;
430    r.y = v1.y > v2.y ? v1.y : v2.y;
431    r.z = v1.z > v2.z ? v1.z : v2.z;
432    r.w = v1.w > v2.w ? v1.w : v2.w;
433    return r;
434}
435
436extern float __attribute__((overloadable)) max(float v1, float v2) {
437    return fmax(v1, v2);
438}
439
440extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
441    return fmax(v1, v2);
442}
443
444extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
445    return fmax(v1, v2);
446}
447
448extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
449    return fmax(v1, v2);
450}
451
452extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
453    return fmax(v1, v2);
454}
455
456extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
457    return fmax(v1, v2);
458}
459
460extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
461    return fmax(v1, v2);
462}
463
464
465/*
466 * MIN
467 */
468
469extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
470    return v1 < v2 ? v1 : v2;
471}
472
473extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
474    char2 r;
475    r.x = v1.x < v2.x ? v1.x : v2.x;
476    r.y = v1.y < v2.y ? v1.y : v2.y;
477    return r;
478}
479
480extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
481    char3 r;
482    r.x = v1.x < v2.x ? v1.x : v2.x;
483    r.y = v1.y < v2.y ? v1.y : v2.y;
484    r.z = v1.z < v2.z ? v1.z : v2.z;
485    return r;
486}
487
488extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
489    char4 r;
490    r.x = v1.x < v2.x ? v1.x : v2.x;
491    r.y = v1.y < v2.y ? v1.y : v2.y;
492    r.z = v1.z < v2.z ? v1.z : v2.z;
493    r.w = v1.w < v2.w ? v1.w : v2.w;
494    return r;
495}
496
497extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
498    return v1 < v2 ? v1 : v2;
499}
500
501extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
502    short2 r;
503    r.x = v1.x < v2.x ? v1.x : v2.x;
504    r.y = v1.y < v2.y ? v1.y : v2.y;
505    return r;
506}
507
508extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
509    short3 r;
510    r.x = v1.x < v2.x ? v1.x : v2.x;
511    r.y = v1.y < v2.y ? v1.y : v2.y;
512    r.z = v1.z < v2.z ? v1.z : v2.z;
513    return r;
514}
515
516extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
517    short4 r;
518    r.x = v1.x < v2.x ? v1.x : v2.x;
519    r.y = v1.y < v2.y ? v1.y : v2.y;
520    r.z = v1.z < v2.z ? v1.z : v2.z;
521    r.w = v1.w < v2.w ? v1.w : v2.w;
522    return r;
523}
524
525extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
526    return v1 < v2 ? v1 : v2;
527}
528
529extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
530    int2 r;
531    r.x = v1.x < v2.x ? v1.x : v2.x;
532    r.y = v1.y < v2.y ? v1.y : v2.y;
533    return r;
534}
535
536extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
537    int3 r;
538    r.x = v1.x < v2.x ? v1.x : v2.x;
539    r.y = v1.y < v2.y ? v1.y : v2.y;
540    r.z = v1.z < v2.z ? v1.z : v2.z;
541    return r;
542}
543
544extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
545    int4 r;
546    r.x = v1.x < v2.x ? v1.x : v2.x;
547    r.y = v1.y < v2.y ? v1.y : v2.y;
548    r.z = v1.z < v2.z ? v1.z : v2.z;
549    r.w = v1.w < v2.w ? v1.w : v2.w;
550    return r;
551}
552
553extern int64_t __attribute__((overloadable)) min(int64_t v1, int64_t v2) {
554    return v1 < v2 ? v1 : v2;
555}
556
557extern long2 __attribute__((overloadable)) min(long2 v1, long2 v2) {
558    long2 r;
559    r.x = v1.x < v2.x ? v1.x : v2.x;
560    r.y = v1.y < v2.y ? v1.y : v2.y;
561    return r;
562}
563
564extern long3 __attribute__((overloadable)) min(long3 v1, long3 v2) {
565    long3 r;
566    r.x = v1.x < v2.x ? v1.x : v2.x;
567    r.y = v1.y < v2.y ? v1.y : v2.y;
568    r.z = v1.z < v2.z ? v1.z : v2.z;
569    return r;
570}
571
572extern long4 __attribute__((overloadable)) min(long4 v1, long4 v2) {
573    long4 r;
574    r.x = v1.x < v2.x ? v1.x : v2.x;
575    r.y = v1.y < v2.y ? v1.y : v2.y;
576    r.z = v1.z < v2.z ? v1.z : v2.z;
577    r.w = v1.w < v2.w ? v1.w : v2.w;
578    return r;
579}
580
581extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
582    return v1 < v2 ? v1 : v2;
583}
584
585extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
586    uchar2 r;
587    r.x = v1.x < v2.x ? v1.x : v2.x;
588    r.y = v1.y < v2.y ? v1.y : v2.y;
589    return r;
590}
591
592extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
593    uchar3 r;
594    r.x = v1.x < v2.x ? v1.x : v2.x;
595    r.y = v1.y < v2.y ? v1.y : v2.y;
596    r.z = v1.z < v2.z ? v1.z : v2.z;
597    return r;
598}
599
600extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
601    uchar4 r;
602    r.x = v1.x < v2.x ? v1.x : v2.x;
603    r.y = v1.y < v2.y ? v1.y : v2.y;
604    r.z = v1.z < v2.z ? v1.z : v2.z;
605    r.w = v1.w < v2.w ? v1.w : v2.w;
606    return r;
607}
608
609extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
610    return v1 < v2 ? v1 : v2;
611}
612
613extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
614    ushort2 r;
615    r.x = v1.x < v2.x ? v1.x : v2.x;
616    r.y = v1.y < v2.y ? v1.y : v2.y;
617    return r;
618}
619
620extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
621    ushort3 r;
622    r.x = v1.x < v2.x ? v1.x : v2.x;
623    r.y = v1.y < v2.y ? v1.y : v2.y;
624    r.z = v1.z < v2.z ? v1.z : v2.z;
625    return r;
626}
627
628extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
629    ushort4 r;
630    r.x = v1.x < v2.x ? v1.x : v2.x;
631    r.y = v1.y < v2.y ? v1.y : v2.y;
632    r.z = v1.z < v2.z ? v1.z : v2.z;
633    r.w = v1.w < v2.w ? v1.w : v2.w;
634    return r;
635}
636
637extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
638    return v1 < v2 ? v1 : v2;
639}
640
641extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
642    uint2 r;
643    r.x = v1.x < v2.x ? v1.x : v2.x;
644    r.y = v1.y < v2.y ? v1.y : v2.y;
645    return r;
646}
647
648extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
649    uint3 r;
650    r.x = v1.x < v2.x ? v1.x : v2.x;
651    r.y = v1.y < v2.y ? v1.y : v2.y;
652    r.z = v1.z < v2.z ? v1.z : v2.z;
653    return r;
654}
655
656extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
657    uint4 r;
658    r.x = v1.x < v2.x ? v1.x : v2.x;
659    r.y = v1.y < v2.y ? v1.y : v2.y;
660    r.z = v1.z < v2.z ? v1.z : v2.z;
661    r.w = v1.w < v2.w ? v1.w : v2.w;
662    return r;
663}
664
665extern ulong __attribute__((overloadable)) min(ulong v1, ulong v2) {
666    return v1 < v2 ? v1 : v2;
667}
668
669extern ulong2 __attribute__((overloadable)) min(ulong2 v1, ulong2 v2) {
670    ulong2 r;
671    r.x = v1.x < v2.x ? v1.x : v2.x;
672    r.y = v1.y < v2.y ? v1.y : v2.y;
673    return r;
674}
675
676extern ulong3 __attribute__((overloadable)) min(ulong3 v1, ulong3 v2) {
677    ulong3 r;
678    r.x = v1.x < v2.x ? v1.x : v2.x;
679    r.y = v1.y < v2.y ? v1.y : v2.y;
680    r.z = v1.z < v2.z ? v1.z : v2.z;
681    return r;
682}
683
684extern ulong4 __attribute__((overloadable)) min(ulong4 v1, ulong4 v2) {
685    ulong4 r;
686    r.x = v1.x < v2.x ? v1.x : v2.x;
687    r.y = v1.y < v2.y ? v1.y : v2.y;
688    r.z = v1.z < v2.z ? v1.z : v2.z;
689    r.w = v1.w < v2.w ? v1.w : v2.w;
690    return r;
691}
692
693extern float __attribute__((overloadable)) min(float v1, float v2) {
694    return fmin(v1, v2);
695}
696
697extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
698    return fmin(v1, v2);
699}
700
701extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
702    return fmin(v1, v2);
703}
704
705extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
706    return fmin(v1, v2);
707}
708
709extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
710    return fmin(v1, v2);
711}
712
713extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
714    return fmin(v1, v2);
715}
716
717extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
718    return fmin(v1, v2);
719}
720
721/*
722 * YUV
723 */
724
725extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
726    short Y = ((short)y) - 16;
727    short U = ((short)u) - 128;
728    short V = ((short)v) - 128;
729
730    short4 p;
731    p.r = (Y * 298 + V * 409 + 128) >> 8;
732    p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
733    p.b = (Y * 298 + U * 516 + 128) >> 8;
734    p.a = 255;
735    p.r = rsClamp(p.r, (short)0, (short)255);
736    p.g = rsClamp(p.g, (short)0, (short)255);
737    p.b = rsClamp(p.b, (short)0, (short)255);
738
739    return convert_uchar4(p);
740}
741
742static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f};
743static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f};
744
745extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) {
746    float4 color = (float)y * 0.003921569f;
747    float4 fU = ((float)u) - 128.f;
748    float4 fV = ((float)v) - 128.f;
749
750    color += fU * yuv_U_values;
751    color += fV * yuv_V_values;
752    color = clamp(color, 0.f, 1.f);
753    return color;
754}
755
756
757/*
758 * half_RECIP
759 */
760
761extern float __attribute__((overloadable)) half_recip(float v) {
762    // FIXME:  actual algorithm for generic approximate reciprocal
763    return 1.f / v;
764}
765
766extern float2 __attribute__((overloadable)) half_recip(float2 v) {
767    float2 r;
768    r.x = half_recip(r.x);
769    r.y = half_recip(r.y);
770    return r;
771}
772
773extern float3 __attribute__((overloadable)) half_recip(float3 v) {
774    float3 r;
775    r.x = half_recip(r.x);
776    r.y = half_recip(r.y);
777    r.z = half_recip(r.z);
778    return r;
779}
780
781extern float4 __attribute__((overloadable)) half_recip(float4 v) {
782    float4 r;
783    r.x = half_recip(r.x);
784    r.y = half_recip(r.y);
785    r.z = half_recip(r.z);
786    r.w = half_recip(r.w);
787    return r;
788}
789
790
791/*
792 * half_SQRT
793 */
794
795extern float __attribute__((overloadable)) half_sqrt(float v) {
796    return sqrt(v);
797}
798
799extern float2 __attribute__((overloadable)) half_sqrt(float2 v) {
800    float2 r;
801    r.x = half_sqrt(v.x);
802    r.y = half_sqrt(v.y);
803    return r;
804}
805
806extern float3 __attribute__((overloadable)) half_sqrt(float3 v) {
807    float3 r;
808    r.x = half_sqrt(v.x);
809    r.y = half_sqrt(v.y);
810    r.z = half_sqrt(v.z);
811    return r;
812}
813
814extern float4 __attribute__((overloadable)) half_sqrt(float4 v) {
815    float4 r;
816    r.x = half_sqrt(v.x);
817    r.y = half_sqrt(v.y);
818    r.z = half_sqrt(v.z);
819    r.w = half_sqrt(v.w);
820    return r;
821}
822
823
824/*
825 * half_rsqrt
826 */
827
828extern float __attribute__((overloadable)) half_rsqrt(float v) {
829    return 1.f / sqrt(v);
830}
831
832extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
833    float2 r;
834    r.x = half_rsqrt(v.x);
835    r.y = half_rsqrt(v.y);
836    return r;
837}
838
839extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
840    float3 r;
841    r.x = half_rsqrt(v.x);
842    r.y = half_rsqrt(v.y);
843    r.z = half_rsqrt(v.z);
844    return r;
845}
846
847extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
848    float4 r;
849    r.x = half_rsqrt(v.x);
850    r.y = half_rsqrt(v.y);
851    r.z = half_rsqrt(v.z);
852    r.w = half_rsqrt(v.w);
853    return r;
854}
855
856/**
857 * matrix ops
858 */
859
860extern float4 __attribute__((overloadable))
861rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
862    float4 ret;
863    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
864    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
865    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
866    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
867    return ret;
868}
869
870extern float4 __attribute__((overloadable))
871rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
872    float4 ret;
873    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
874    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
875    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
876    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
877    return ret;
878}
879
880extern float4 __attribute__((overloadable))
881rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
882    float4 ret;
883    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
884    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
885    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
886    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
887    return ret;
888}
889
890extern float3 __attribute__((overloadable))
891rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
892    float3 ret;
893    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
894    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
895    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
896    return ret;
897}
898
899extern float3 __attribute__((overloadable))
900rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
901    float3 ret;
902    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
903    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
904    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
905    return ret;
906}
907
908/**
909 * Pixel Ops
910 */
911extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
912{
913    uchar4 c;
914    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
915    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
916    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
917    c.w = 255;
918    return c;
919}
920
921extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
922{
923    uchar4 c;
924    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
925    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
926    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
927    c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
928    return c;
929}
930
931extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
932{
933    color *= 255.f;
934    color += 0.5f;
935    color = clamp(color, 0.f, 255.f);
936    uchar4 c = {color.x, color.y, color.z, 255};
937    return c;
938}
939
940extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
941{
942    color *= 255.f;
943    color += 0.5f;
944    color = clamp(color, 0.f, 255.f);
945    uchar4 c = {color.x, color.y, color.z, color.w};
946    return c;
947}
948
949