generic.c revision f7f9316f4cf373733285f947bea876ead7216715
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rs_core.rsh"
19
20extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
21extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
22extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
23extern float4 __attribute__((overloadable)) convert_float4(uchar4);
24extern float __attribute__((overloadable)) sqrt(float);
25
26/*
27 * CLAMP
28 */
29#define _CLAMP(T) \
30extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
31    return amount < low ? low : (amount > high ? high : amount);                    \
32}                                                                                   \
33                                                                                    \
34extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
35    T##2 r;                                                                         \
36    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
37    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
38    return r;                                                                       \
39}                                                                                   \
40                                                                                    \
41extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
42    T##3 r;                                                                         \
43    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
44    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
45    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
46    return r;                                                                       \
47}                                                                                   \
48                                                                                    \
49extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
50    T##4 r;                                                                         \
51    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
52    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
53    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
54    r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
55    return r;                                                                       \
56}                                                                                   \
57                                                                                    \
58extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
59    T##2 r;                                                                         \
60    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
61    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
62    return r;                                                                       \
63}                                                                                   \
64                                                                                    \
65extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
66    T##3 r;                                                                         \
67    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
68    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
69    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
70    return r;                                                                       \
71}                                                                                   \
72                                                                                    \
73extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
74    T##4 r;                                                                         \
75    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
76    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
77    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
78    r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
79    return r;                                                                       \
80}
81
82#if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
83// These functions must be defined here if we are not using the SSE
84// implementation, which includes when we are built as part of the
85// debug runtime (libclcore_debug.bc) or compiling with debug info.
86
87_CLAMP(float);
88
89#else
90
91extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
92extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
93extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
94extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
95extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
96extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
97extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
98
99#endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
100
101_CLAMP(double);
102_CLAMP(char);
103_CLAMP(uchar);
104_CLAMP(short);
105_CLAMP(ushort);
106_CLAMP(int);
107_CLAMP(uint);
108_CLAMP(long);
109_CLAMP(ulong);
110
111#undef _CLAMP
112
113/*
114 * FMAX
115 */
116
117extern float __attribute__((overloadable)) fmax(float v1, float v2) {
118    return v1 > v2 ? v1 : v2;
119}
120
121extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
122    float2 r;
123    r.x = v1.x > v2.x ? v1.x : v2.x;
124    r.y = v1.y > v2.y ? v1.y : v2.y;
125    return r;
126}
127
128extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
129    float3 r;
130    r.x = v1.x > v2.x ? v1.x : v2.x;
131    r.y = v1.y > v2.y ? v1.y : v2.y;
132    r.z = v1.z > v2.z ? v1.z : v2.z;
133    return r;
134}
135
136extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
137    float4 r;
138    r.x = v1.x > v2.x ? v1.x : v2.x;
139    r.y = v1.y > v2.y ? v1.y : v2.y;
140    r.z = v1.z > v2.z ? v1.z : v2.z;
141    r.w = v1.w > v2.w ? v1.w : v2.w;
142    return r;
143}
144
145extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
146    float2 r;
147    r.x = v1.x > v2 ? v1.x : v2;
148    r.y = v1.y > v2 ? v1.y : v2;
149    return r;
150}
151
152extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
153    float3 r;
154    r.x = v1.x > v2 ? v1.x : v2;
155    r.y = v1.y > v2 ? v1.y : v2;
156    r.z = v1.z > v2 ? v1.z : v2;
157    return r;
158}
159
160extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
161    float4 r;
162    r.x = v1.x > v2 ? v1.x : v2;
163    r.y = v1.y > v2 ? v1.y : v2;
164    r.z = v1.z > v2 ? v1.z : v2;
165    r.w = v1.w > v2 ? v1.w : v2;
166    return r;
167}
168
169extern float __attribute__((overloadable)) fmin(float v1, float v2) {
170    return v1 < v2 ? v1 : v2;
171}
172
173
174/*
175 * FMIN
176 */
177extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
178    float2 r;
179    r.x = v1.x < v2.x ? v1.x : v2.x;
180    r.y = v1.y < v2.y ? v1.y : v2.y;
181    return r;
182}
183
184extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
185    float3 r;
186    r.x = v1.x < v2.x ? v1.x : v2.x;
187    r.y = v1.y < v2.y ? v1.y : v2.y;
188    r.z = v1.z < v2.z ? v1.z : v2.z;
189    return r;
190}
191
192extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
193    float4 r;
194    r.x = v1.x < v2.x ? v1.x : v2.x;
195    r.y = v1.y < v2.y ? v1.y : v2.y;
196    r.z = v1.z < v2.z ? v1.z : v2.z;
197    r.w = v1.w < v2.w ? v1.w : v2.w;
198    return r;
199}
200
201extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
202    float2 r;
203    r.x = v1.x < v2 ? v1.x : v2;
204    r.y = v1.y < v2 ? v1.y : v2;
205    return r;
206}
207
208extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
209    float3 r;
210    r.x = v1.x < v2 ? v1.x : v2;
211    r.y = v1.y < v2 ? v1.y : v2;
212    r.z = v1.z < v2 ? v1.z : v2;
213    return r;
214}
215
216extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
217    float4 r;
218    r.x = v1.x < v2 ? v1.x : v2;
219    r.y = v1.y < v2 ? v1.y : v2;
220    r.z = v1.z < v2 ? v1.z : v2;
221    r.w = v1.w < v2 ? v1.w : v2;
222    return r;
223}
224
225
226/*
227 * MAX
228 */
229
230extern char __attribute__((overloadable)) max(char v1, char v2) {
231    return v1 > v2 ? v1 : v2;
232}
233
234extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
235    char2 r;
236    r.x = v1.x > v2.x ? v1.x : v2.x;
237    r.y = v1.y > v2.y ? v1.y : v2.y;
238    return r;
239}
240
241extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
242    char3 r;
243    r.x = v1.x > v2.x ? v1.x : v2.x;
244    r.y = v1.y > v2.y ? v1.y : v2.y;
245    r.z = v1.z > v2.z ? v1.z : v2.z;
246    return r;
247}
248
249extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
250    char4 r;
251    r.x = v1.x > v2.x ? v1.x : v2.x;
252    r.y = v1.y > v2.y ? v1.y : v2.y;
253    r.z = v1.z > v2.z ? v1.z : v2.z;
254    r.w = v1.w > v2.w ? v1.w : v2.w;
255    return r;
256}
257
258extern short __attribute__((overloadable)) max(short v1, short v2) {
259    return v1 > v2 ? v1 : v2;
260}
261
262extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
263    short2 r;
264    r.x = v1.x > v2.x ? v1.x : v2.x;
265    r.y = v1.y > v2.y ? v1.y : v2.y;
266    return r;
267}
268
269extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
270    short3 r;
271    r.x = v1.x > v2.x ? v1.x : v2.x;
272    r.y = v1.y > v2.y ? v1.y : v2.y;
273    r.z = v1.z > v2.z ? v1.z : v2.z;
274    return r;
275}
276
277extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
278    short4 r;
279    r.x = v1.x > v2.x ? v1.x : v2.x;
280    r.y = v1.y > v2.y ? v1.y : v2.y;
281    r.z = v1.z > v2.z ? v1.z : v2.z;
282    r.w = v1.w > v2.w ? v1.w : v2.w;
283    return r;
284}
285
286extern int __attribute__((overloadable)) max(int v1, int v2) {
287    return v1 > v2 ? v1 : v2;
288}
289
290extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
291    int2 r;
292    r.x = v1.x > v2.x ? v1.x : v2.x;
293    r.y = v1.y > v2.y ? v1.y : v2.y;
294    return r;
295}
296
297extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
298    int3 r;
299    r.x = v1.x > v2.x ? v1.x : v2.x;
300    r.y = v1.y > v2.y ? v1.y : v2.y;
301    r.z = v1.z > v2.z ? v1.z : v2.z;
302    return r;
303}
304
305extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
306    int4 r;
307    r.x = v1.x > v2.x ? v1.x : v2.x;
308    r.y = v1.y > v2.y ? v1.y : v2.y;
309    r.z = v1.z > v2.z ? v1.z : v2.z;
310    r.w = v1.w > v2.w ? v1.w : v2.w;
311    return r;
312}
313
314extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
315    return v1 > v2 ? v1 : v2;
316}
317
318extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
319    uchar2 r;
320    r.x = v1.x > v2.x ? v1.x : v2.x;
321    r.y = v1.y > v2.y ? v1.y : v2.y;
322    return r;
323}
324
325extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
326    uchar3 r;
327    r.x = v1.x > v2.x ? v1.x : v2.x;
328    r.y = v1.y > v2.y ? v1.y : v2.y;
329    r.z = v1.z > v2.z ? v1.z : v2.z;
330    return r;
331}
332
333extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
334    uchar4 r;
335    r.x = v1.x > v2.x ? v1.x : v2.x;
336    r.y = v1.y > v2.y ? v1.y : v2.y;
337    r.z = v1.z > v2.z ? v1.z : v2.z;
338    r.w = v1.w > v2.w ? v1.w : v2.w;
339    return r;
340}
341
342extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
343    return v1 > v2 ? v1 : v2;
344}
345
346extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
347    ushort2 r;
348    r.x = v1.x > v2.x ? v1.x : v2.x;
349    r.y = v1.y > v2.y ? v1.y : v2.y;
350    return r;
351}
352
353extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
354    ushort3 r;
355    r.x = v1.x > v2.x ? v1.x : v2.x;
356    r.y = v1.y > v2.y ? v1.y : v2.y;
357    r.z = v1.z > v2.z ? v1.z : v2.z;
358    return r;
359}
360
361extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
362    ushort4 r;
363    r.x = v1.x > v2.x ? v1.x : v2.x;
364    r.y = v1.y > v2.y ? v1.y : v2.y;
365    r.z = v1.z > v2.z ? v1.z : v2.z;
366    r.w = v1.w > v2.w ? v1.w : v2.w;
367    return r;
368}
369
370extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
371    return v1 > v2 ? v1 : v2;
372}
373
374extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
375    uint2 r;
376    r.x = v1.x > v2.x ? v1.x : v2.x;
377    r.y = v1.y > v2.y ? v1.y : v2.y;
378    return r;
379}
380
381extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
382    uint3 r;
383    r.x = v1.x > v2.x ? v1.x : v2.x;
384    r.y = v1.y > v2.y ? v1.y : v2.y;
385    r.z = v1.z > v2.z ? v1.z : v2.z;
386    return r;
387}
388
389extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
390    uint4 r;
391    r.x = v1.x > v2.x ? v1.x : v2.x;
392    r.y = v1.y > v2.y ? v1.y : v2.y;
393    r.z = v1.z > v2.z ? v1.z : v2.z;
394    r.w = v1.w > v2.w ? v1.w : v2.w;
395    return r;
396}
397
398extern float __attribute__((overloadable)) max(float v1, float v2) {
399    return fmax(v1, v2);
400}
401
402extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
403    return fmax(v1, v2);
404}
405
406extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
407    return fmax(v1, v2);
408}
409
410extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
411    return fmax(v1, v2);
412}
413
414extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
415    return fmax(v1, v2);
416}
417
418extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
419    return fmax(v1, v2);
420}
421
422extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
423    return fmax(v1, v2);
424}
425
426
427/*
428 * MIN
429 */
430
431extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
432    return v1 < v2 ? v1 : v2;
433}
434
435extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
436    char2 r;
437    r.x = v1.x < v2.x ? v1.x : v2.x;
438    r.y = v1.y < v2.y ? v1.y : v2.y;
439    return r;
440}
441
442extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
443    char3 r;
444    r.x = v1.x < v2.x ? v1.x : v2.x;
445    r.y = v1.y < v2.y ? v1.y : v2.y;
446    r.z = v1.z < v2.z ? v1.z : v2.z;
447    return r;
448}
449
450extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
451    char4 r;
452    r.x = v1.x < v2.x ? v1.x : v2.x;
453    r.y = v1.y < v2.y ? v1.y : v2.y;
454    r.z = v1.z < v2.z ? v1.z : v2.z;
455    r.w = v1.w < v2.w ? v1.w : v2.w;
456    return r;
457}
458
459extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
460    return v1 < v2 ? v1 : v2;
461}
462
463extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
464    short2 r;
465    r.x = v1.x < v2.x ? v1.x : v2.x;
466    r.y = v1.y < v2.y ? v1.y : v2.y;
467    return r;
468}
469
470extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
471    short3 r;
472    r.x = v1.x < v2.x ? v1.x : v2.x;
473    r.y = v1.y < v2.y ? v1.y : v2.y;
474    r.z = v1.z < v2.z ? v1.z : v2.z;
475    return r;
476}
477
478extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
479    short4 r;
480    r.x = v1.x < v2.x ? v1.x : v2.x;
481    r.y = v1.y < v2.y ? v1.y : v2.y;
482    r.z = v1.z < v2.z ? v1.z : v2.z;
483    r.w = v1.w < v2.w ? v1.w : v2.w;
484    return r;
485}
486
487extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
488    return v1 < v2 ? v1 : v2;
489}
490
491extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
492    int2 r;
493    r.x = v1.x < v2.x ? v1.x : v2.x;
494    r.y = v1.y < v2.y ? v1.y : v2.y;
495    return r;
496}
497
498extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
499    int3 r;
500    r.x = v1.x < v2.x ? v1.x : v2.x;
501    r.y = v1.y < v2.y ? v1.y : v2.y;
502    r.z = v1.z < v2.z ? v1.z : v2.z;
503    return r;
504}
505
506extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
507    int4 r;
508    r.x = v1.x < v2.x ? v1.x : v2.x;
509    r.y = v1.y < v2.y ? v1.y : v2.y;
510    r.z = v1.z < v2.z ? v1.z : v2.z;
511    r.w = v1.w < v2.w ? v1.w : v2.w;
512    return r;
513}
514
515extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
516    return v1 < v2 ? v1 : v2;
517}
518
519extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
520    uchar2 r;
521    r.x = v1.x < v2.x ? v1.x : v2.x;
522    r.y = v1.y < v2.y ? v1.y : v2.y;
523    return r;
524}
525
526extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
527    uchar3 r;
528    r.x = v1.x < v2.x ? v1.x : v2.x;
529    r.y = v1.y < v2.y ? v1.y : v2.y;
530    r.z = v1.z < v2.z ? v1.z : v2.z;
531    return r;
532}
533
534extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
535    uchar4 r;
536    r.x = v1.x < v2.x ? v1.x : v2.x;
537    r.y = v1.y < v2.y ? v1.y : v2.y;
538    r.z = v1.z < v2.z ? v1.z : v2.z;
539    r.w = v1.w < v2.w ? v1.w : v2.w;
540    return r;
541}
542
543extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
544    return v1 < v2 ? v1 : v2;
545}
546
547extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
548    ushort2 r;
549    r.x = v1.x < v2.x ? v1.x : v2.x;
550    r.y = v1.y < v2.y ? v1.y : v2.y;
551    return r;
552}
553
554extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
555    ushort3 r;
556    r.x = v1.x < v2.x ? v1.x : v2.x;
557    r.y = v1.y < v2.y ? v1.y : v2.y;
558    r.z = v1.z < v2.z ? v1.z : v2.z;
559    return r;
560}
561
562extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
563    ushort4 r;
564    r.x = v1.x < v2.x ? v1.x : v2.x;
565    r.y = v1.y < v2.y ? v1.y : v2.y;
566    r.z = v1.z < v2.z ? v1.z : v2.z;
567    r.w = v1.w < v2.w ? v1.w : v2.w;
568    return r;
569}
570
571extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
572    return v1 < v2 ? v1 : v2;
573}
574
575extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
576    uint2 r;
577    r.x = v1.x < v2.x ? v1.x : v2.x;
578    r.y = v1.y < v2.y ? v1.y : v2.y;
579    return r;
580}
581
582extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
583    uint3 r;
584    r.x = v1.x < v2.x ? v1.x : v2.x;
585    r.y = v1.y < v2.y ? v1.y : v2.y;
586    r.z = v1.z < v2.z ? v1.z : v2.z;
587    return r;
588}
589
590extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
591    uint4 r;
592    r.x = v1.x < v2.x ? v1.x : v2.x;
593    r.y = v1.y < v2.y ? v1.y : v2.y;
594    r.z = v1.z < v2.z ? v1.z : v2.z;
595    r.w = v1.w < v2.w ? v1.w : v2.w;
596    return r;
597}
598
599extern float __attribute__((overloadable)) min(float v1, float v2) {
600    return fmin(v1, v2);
601}
602
603extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
604    return fmin(v1, v2);
605}
606
607extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
608    return fmin(v1, v2);
609}
610
611extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
612    return fmin(v1, v2);
613}
614
615extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
616    return fmin(v1, v2);
617}
618
619extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
620    return fmin(v1, v2);
621}
622
623extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
624    return fmin(v1, v2);
625}
626
627/*
628 * YUV
629 */
630
631extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
632    short Y = ((short)y) - 16;
633    short U = ((short)u) - 128;
634    short V = ((short)v) - 128;
635
636    short4 p;
637    p.r = (Y * 298 + V * 409 + 128) >> 8;
638    p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
639    p.b = (Y * 298 + U * 516 + 128) >> 8;
640    p.a = 255;
641    p.r = rsClamp(p.r, (short)0, (short)255);
642    p.g = rsClamp(p.g, (short)0, (short)255);
643    p.b = rsClamp(p.b, (short)0, (short)255);
644
645    return convert_uchar4(p);
646}
647
648/*
649 * half_RECIP
650 */
651
652extern float2 __attribute__((overloadable)) half_recip(float2 v) {
653    return ((float2) 1.f) / v;
654}
655
656extern float3 __attribute__((overloadable)) half_recip(float3 v) {
657    return ((float3) 1.f) / v;
658}
659
660extern float4 __attribute__((overloadable)) half_recip(float4 v) {
661    return ((float4) 1.f) / v;
662}
663
664
665
666/*
667 * half_rsqrt
668 */
669
670extern float __attribute__((overloadable)) half_rsqrt(float v) {
671    return 1.f / sqrt(v);
672}
673
674extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
675    float2 r;
676    r.x = half_rsqrt(v.x);
677    r.y = half_rsqrt(v.y);
678    return r;
679}
680
681extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
682    float3 r;
683    r.x = half_rsqrt(v.x);
684    r.y = half_rsqrt(v.y);
685    r.z = half_rsqrt(v.z);
686    return r;
687}
688
689extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
690    float4 r;
691    r.x = half_rsqrt(v.x);
692    r.y = half_rsqrt(v.y);
693    r.z = half_rsqrt(v.z);
694    r.w = half_rsqrt(v.w);
695    return r;
696}
697
698/**
699 * matrix ops
700 */
701
702extern float4 __attribute__((overloadable))
703rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
704    float4 ret;
705    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
706    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
707    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
708    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
709    return ret;
710}
711
712extern float4 __attribute__((overloadable))
713rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
714    float4 ret;
715    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
716    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
717    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
718    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
719    return ret;
720}
721
722extern float4 __attribute__((overloadable))
723rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
724    float4 ret;
725    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
726    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
727    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
728    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
729    return ret;
730}
731
732extern float3 __attribute__((overloadable))
733rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
734    float3 ret;
735    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
736    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
737    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
738    return ret;
739}
740
741extern float3 __attribute__((overloadable))
742rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
743    float3 ret;
744    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
745    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
746    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
747    return ret;
748}
749
750/**
751 * Pixel Ops
752 */
753extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
754{
755    uchar4 c;
756    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
757    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
758    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
759    c.w = 255;
760    return c;
761}
762
763extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
764{
765    uchar4 c;
766    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
767    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
768    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
769    c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
770    return c;
771}
772
773extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
774{
775    color *= 255.f;
776    color += 0.5f;
777    color = clamp(color, 0.f, 255.f);
778    uchar4 c = {color.x, color.y, color.z, 255};
779    return c;
780}
781
782extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
783{
784    color *= 255.f;
785    color += 0.5f;
786    color = clamp(color, 0.f, 255.f);
787    uchar4 c = {color.x, color.y, color.z, color.w};
788    return c;
789}
790