1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rs_core.rsh"
19
20extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
21extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
22extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
23extern float4 __attribute__((overloadable)) convert_float4(uchar4);
24extern float __attribute__((overloadable)) sqrt(float);
25
26/*
27 * CLAMP
28 */
29#define _CLAMP(T) \
30extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
31    return amount < low ? low : (amount > high ? high : amount);                    \
32}                                                                                   \
33                                                                                    \
34extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
35    T##2 r;                                                                         \
36    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
37    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
38    return r;                                                                       \
39}                                                                                   \
40                                                                                    \
41extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
42    T##3 r;                                                                         \
43    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
44    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
45    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
46    return r;                                                                       \
47}                                                                                   \
48                                                                                    \
49extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
50    T##4 r;                                                                         \
51    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
52    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
53    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
54    r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
55    return r;                                                                       \
56}                                                                                   \
57                                                                                    \
58extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
59    T##2 r;                                                                         \
60    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
61    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
62    return r;                                                                       \
63}                                                                                   \
64                                                                                    \
65extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
66    T##3 r;                                                                         \
67    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
68    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
69    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
70    return r;                                                                       \
71}                                                                                   \
72                                                                                    \
73extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
74    T##4 r;                                                                         \
75    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
76    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
77    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
78    r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
79    return r;                                                                       \
80}
81
82#if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
83// These functions must be defined here if we are not using the SSE
84// implementation, which includes when we are built as part of the
85// debug runtime (libclcore_debug.bc) or compiling with debug info.
86
87_CLAMP(float);
88
89#else
90
91extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
92extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
93extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
94extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
95extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
96extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
97extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
98
99#endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
100
101_CLAMP(half);
102_CLAMP(double);
103_CLAMP(char);
104_CLAMP(uchar);
105_CLAMP(short);
106_CLAMP(ushort);
107_CLAMP(int);
108_CLAMP(uint);
109_CLAMP(long);
110_CLAMP(ulong);
111
112#undef _CLAMP
113
114/*
115 * FMAX
116 */
117
118extern float __attribute__((overloadable)) fmax(float v1, float v2) {
119    return v1 > v2 ? v1 : v2;
120}
121
122extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
123    float2 r;
124    r.x = v1.x > v2.x ? v1.x : v2.x;
125    r.y = v1.y > v2.y ? v1.y : v2.y;
126    return r;
127}
128
129extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
130    float3 r;
131    r.x = v1.x > v2.x ? v1.x : v2.x;
132    r.y = v1.y > v2.y ? v1.y : v2.y;
133    r.z = v1.z > v2.z ? v1.z : v2.z;
134    return r;
135}
136
137extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
138    float4 r;
139    r.x = v1.x > v2.x ? v1.x : v2.x;
140    r.y = v1.y > v2.y ? v1.y : v2.y;
141    r.z = v1.z > v2.z ? v1.z : v2.z;
142    r.w = v1.w > v2.w ? v1.w : v2.w;
143    return r;
144}
145
146extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
147    float2 r;
148    r.x = v1.x > v2 ? v1.x : v2;
149    r.y = v1.y > v2 ? v1.y : v2;
150    return r;
151}
152
153extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
154    float3 r;
155    r.x = v1.x > v2 ? v1.x : v2;
156    r.y = v1.y > v2 ? v1.y : v2;
157    r.z = v1.z > v2 ? v1.z : v2;
158    return r;
159}
160
161extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
162    float4 r;
163    r.x = v1.x > v2 ? v1.x : v2;
164    r.y = v1.y > v2 ? v1.y : v2;
165    r.z = v1.z > v2 ? v1.z : v2;
166    r.w = v1.w > v2 ? v1.w : v2;
167    return r;
168}
169
170extern float __attribute__((overloadable)) fmin(float v1, float v2) {
171    return v1 < v2 ? v1 : v2;
172}
173
174
175/*
176 * FMIN
177 */
178extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
179    float2 r;
180    r.x = v1.x < v2.x ? v1.x : v2.x;
181    r.y = v1.y < v2.y ? v1.y : v2.y;
182    return r;
183}
184
185extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
186    float3 r;
187    r.x = v1.x < v2.x ? v1.x : v2.x;
188    r.y = v1.y < v2.y ? v1.y : v2.y;
189    r.z = v1.z < v2.z ? v1.z : v2.z;
190    return r;
191}
192
193extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
194    float4 r;
195    r.x = v1.x < v2.x ? v1.x : v2.x;
196    r.y = v1.y < v2.y ? v1.y : v2.y;
197    r.z = v1.z < v2.z ? v1.z : v2.z;
198    r.w = v1.w < v2.w ? v1.w : v2.w;
199    return r;
200}
201
202extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
203    float2 r;
204    r.x = v1.x < v2 ? v1.x : v2;
205    r.y = v1.y < v2 ? v1.y : v2;
206    return r;
207}
208
209extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
210    float3 r;
211    r.x = v1.x < v2 ? v1.x : v2;
212    r.y = v1.y < v2 ? v1.y : v2;
213    r.z = v1.z < v2 ? v1.z : v2;
214    return r;
215}
216
217extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
218    float4 r;
219    r.x = v1.x < v2 ? v1.x : v2;
220    r.y = v1.y < v2 ? v1.y : v2;
221    r.z = v1.z < v2 ? v1.z : v2;
222    r.w = v1.w < v2 ? v1.w : v2;
223    return r;
224}
225
226
227/*
228 * MAX
229 */
230
231extern char __attribute__((overloadable)) max(char v1, char v2) {
232    return v1 > v2 ? v1 : v2;
233}
234
235extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
236    char2 r;
237    r.x = v1.x > v2.x ? v1.x : v2.x;
238    r.y = v1.y > v2.y ? v1.y : v2.y;
239    return r;
240}
241
242extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
243    char3 r;
244    r.x = v1.x > v2.x ? v1.x : v2.x;
245    r.y = v1.y > v2.y ? v1.y : v2.y;
246    r.z = v1.z > v2.z ? v1.z : v2.z;
247    return r;
248}
249
250extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
251    char4 r;
252    r.x = v1.x > v2.x ? v1.x : v2.x;
253    r.y = v1.y > v2.y ? v1.y : v2.y;
254    r.z = v1.z > v2.z ? v1.z : v2.z;
255    r.w = v1.w > v2.w ? v1.w : v2.w;
256    return r;
257}
258
259extern short __attribute__((overloadable)) max(short v1, short v2) {
260    return v1 > v2 ? v1 : v2;
261}
262
263extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
264    short2 r;
265    r.x = v1.x > v2.x ? v1.x : v2.x;
266    r.y = v1.y > v2.y ? v1.y : v2.y;
267    return r;
268}
269
270extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
271    short3 r;
272    r.x = v1.x > v2.x ? v1.x : v2.x;
273    r.y = v1.y > v2.y ? v1.y : v2.y;
274    r.z = v1.z > v2.z ? v1.z : v2.z;
275    return r;
276}
277
278extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
279    short4 r;
280    r.x = v1.x > v2.x ? v1.x : v2.x;
281    r.y = v1.y > v2.y ? v1.y : v2.y;
282    r.z = v1.z > v2.z ? v1.z : v2.z;
283    r.w = v1.w > v2.w ? v1.w : v2.w;
284    return r;
285}
286
287extern int __attribute__((overloadable)) max(int v1, int v2) {
288    return v1 > v2 ? v1 : v2;
289}
290
291extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
292    int2 r;
293    r.x = v1.x > v2.x ? v1.x : v2.x;
294    r.y = v1.y > v2.y ? v1.y : v2.y;
295    return r;
296}
297
298extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
299    int3 r;
300    r.x = v1.x > v2.x ? v1.x : v2.x;
301    r.y = v1.y > v2.y ? v1.y : v2.y;
302    r.z = v1.z > v2.z ? v1.z : v2.z;
303    return r;
304}
305
306extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
307    int4 r;
308    r.x = v1.x > v2.x ? v1.x : v2.x;
309    r.y = v1.y > v2.y ? v1.y : v2.y;
310    r.z = v1.z > v2.z ? v1.z : v2.z;
311    r.w = v1.w > v2.w ? v1.w : v2.w;
312    return r;
313}
314
315extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
316    return v1 > v2 ? v1 : v2;
317}
318
319extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
320    uchar2 r;
321    r.x = v1.x > v2.x ? v1.x : v2.x;
322    r.y = v1.y > v2.y ? v1.y : v2.y;
323    return r;
324}
325
326extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
327    uchar3 r;
328    r.x = v1.x > v2.x ? v1.x : v2.x;
329    r.y = v1.y > v2.y ? v1.y : v2.y;
330    r.z = v1.z > v2.z ? v1.z : v2.z;
331    return r;
332}
333
334extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
335    uchar4 r;
336    r.x = v1.x > v2.x ? v1.x : v2.x;
337    r.y = v1.y > v2.y ? v1.y : v2.y;
338    r.z = v1.z > v2.z ? v1.z : v2.z;
339    r.w = v1.w > v2.w ? v1.w : v2.w;
340    return r;
341}
342
343extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
344    return v1 > v2 ? v1 : v2;
345}
346
347extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
348    ushort2 r;
349    r.x = v1.x > v2.x ? v1.x : v2.x;
350    r.y = v1.y > v2.y ? v1.y : v2.y;
351    return r;
352}
353
354extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
355    ushort3 r;
356    r.x = v1.x > v2.x ? v1.x : v2.x;
357    r.y = v1.y > v2.y ? v1.y : v2.y;
358    r.z = v1.z > v2.z ? v1.z : v2.z;
359    return r;
360}
361
362extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
363    ushort4 r;
364    r.x = v1.x > v2.x ? v1.x : v2.x;
365    r.y = v1.y > v2.y ? v1.y : v2.y;
366    r.z = v1.z > v2.z ? v1.z : v2.z;
367    r.w = v1.w > v2.w ? v1.w : v2.w;
368    return r;
369}
370
371extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
372    return v1 > v2 ? v1 : v2;
373}
374
375extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
376    uint2 r;
377    r.x = v1.x > v2.x ? v1.x : v2.x;
378    r.y = v1.y > v2.y ? v1.y : v2.y;
379    return r;
380}
381
382extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
383    uint3 r;
384    r.x = v1.x > v2.x ? v1.x : v2.x;
385    r.y = v1.y > v2.y ? v1.y : v2.y;
386    r.z = v1.z > v2.z ? v1.z : v2.z;
387    return r;
388}
389
390extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
391    uint4 r;
392    r.x = v1.x > v2.x ? v1.x : v2.x;
393    r.y = v1.y > v2.y ? v1.y : v2.y;
394    r.z = v1.z > v2.z ? v1.z : v2.z;
395    r.w = v1.w > v2.w ? v1.w : v2.w;
396    return r;
397}
398
399extern float __attribute__((overloadable)) max(float v1, float v2) {
400    return fmax(v1, v2);
401}
402
403extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
404    return fmax(v1, v2);
405}
406
407extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
408    return fmax(v1, v2);
409}
410
411extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
412    return fmax(v1, v2);
413}
414
415extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
416    return fmax(v1, v2);
417}
418
419extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
420    return fmax(v1, v2);
421}
422
423extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
424    return fmax(v1, v2);
425}
426
427
428/*
429 * MIN
430 */
431
432extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
433    return v1 < v2 ? v1 : v2;
434}
435
436extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
437    char2 r;
438    r.x = v1.x < v2.x ? v1.x : v2.x;
439    r.y = v1.y < v2.y ? v1.y : v2.y;
440    return r;
441}
442
443extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
444    char3 r;
445    r.x = v1.x < v2.x ? v1.x : v2.x;
446    r.y = v1.y < v2.y ? v1.y : v2.y;
447    r.z = v1.z < v2.z ? v1.z : v2.z;
448    return r;
449}
450
451extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
452    char4 r;
453    r.x = v1.x < v2.x ? v1.x : v2.x;
454    r.y = v1.y < v2.y ? v1.y : v2.y;
455    r.z = v1.z < v2.z ? v1.z : v2.z;
456    r.w = v1.w < v2.w ? v1.w : v2.w;
457    return r;
458}
459
460extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
461    return v1 < v2 ? v1 : v2;
462}
463
464extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
465    short2 r;
466    r.x = v1.x < v2.x ? v1.x : v2.x;
467    r.y = v1.y < v2.y ? v1.y : v2.y;
468    return r;
469}
470
471extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
472    short3 r;
473    r.x = v1.x < v2.x ? v1.x : v2.x;
474    r.y = v1.y < v2.y ? v1.y : v2.y;
475    r.z = v1.z < v2.z ? v1.z : v2.z;
476    return r;
477}
478
479extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
480    short4 r;
481    r.x = v1.x < v2.x ? v1.x : v2.x;
482    r.y = v1.y < v2.y ? v1.y : v2.y;
483    r.z = v1.z < v2.z ? v1.z : v2.z;
484    r.w = v1.w < v2.w ? v1.w : v2.w;
485    return r;
486}
487
488extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
489    return v1 < v2 ? v1 : v2;
490}
491
492extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
493    int2 r;
494    r.x = v1.x < v2.x ? v1.x : v2.x;
495    r.y = v1.y < v2.y ? v1.y : v2.y;
496    return r;
497}
498
499extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
500    int3 r;
501    r.x = v1.x < v2.x ? v1.x : v2.x;
502    r.y = v1.y < v2.y ? v1.y : v2.y;
503    r.z = v1.z < v2.z ? v1.z : v2.z;
504    return r;
505}
506
507extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
508    int4 r;
509    r.x = v1.x < v2.x ? v1.x : v2.x;
510    r.y = v1.y < v2.y ? v1.y : v2.y;
511    r.z = v1.z < v2.z ? v1.z : v2.z;
512    r.w = v1.w < v2.w ? v1.w : v2.w;
513    return r;
514}
515
516extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
517    return v1 < v2 ? v1 : v2;
518}
519
520extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
521    uchar2 r;
522    r.x = v1.x < v2.x ? v1.x : v2.x;
523    r.y = v1.y < v2.y ? v1.y : v2.y;
524    return r;
525}
526
527extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
528    uchar3 r;
529    r.x = v1.x < v2.x ? v1.x : v2.x;
530    r.y = v1.y < v2.y ? v1.y : v2.y;
531    r.z = v1.z < v2.z ? v1.z : v2.z;
532    return r;
533}
534
535extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
536    uchar4 r;
537    r.x = v1.x < v2.x ? v1.x : v2.x;
538    r.y = v1.y < v2.y ? v1.y : v2.y;
539    r.z = v1.z < v2.z ? v1.z : v2.z;
540    r.w = v1.w < v2.w ? v1.w : v2.w;
541    return r;
542}
543
544extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
545    return v1 < v2 ? v1 : v2;
546}
547
548extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
549    ushort2 r;
550    r.x = v1.x < v2.x ? v1.x : v2.x;
551    r.y = v1.y < v2.y ? v1.y : v2.y;
552    return r;
553}
554
555extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
556    ushort3 r;
557    r.x = v1.x < v2.x ? v1.x : v2.x;
558    r.y = v1.y < v2.y ? v1.y : v2.y;
559    r.z = v1.z < v2.z ? v1.z : v2.z;
560    return r;
561}
562
563extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
564    ushort4 r;
565    r.x = v1.x < v2.x ? v1.x : v2.x;
566    r.y = v1.y < v2.y ? v1.y : v2.y;
567    r.z = v1.z < v2.z ? v1.z : v2.z;
568    r.w = v1.w < v2.w ? v1.w : v2.w;
569    return r;
570}
571
572extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
573    return v1 < v2 ? v1 : v2;
574}
575
576extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
577    uint2 r;
578    r.x = v1.x < v2.x ? v1.x : v2.x;
579    r.y = v1.y < v2.y ? v1.y : v2.y;
580    return r;
581}
582
583extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
584    uint3 r;
585    r.x = v1.x < v2.x ? v1.x : v2.x;
586    r.y = v1.y < v2.y ? v1.y : v2.y;
587    r.z = v1.z < v2.z ? v1.z : v2.z;
588    return r;
589}
590
591extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
592    uint4 r;
593    r.x = v1.x < v2.x ? v1.x : v2.x;
594    r.y = v1.y < v2.y ? v1.y : v2.y;
595    r.z = v1.z < v2.z ? v1.z : v2.z;
596    r.w = v1.w < v2.w ? v1.w : v2.w;
597    return r;
598}
599
600extern float __attribute__((overloadable)) min(float v1, float v2) {
601    return fmin(v1, v2);
602}
603
604extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
605    return fmin(v1, v2);
606}
607
608extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
609    return fmin(v1, v2);
610}
611
612extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
613    return fmin(v1, v2);
614}
615
616extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
617    return fmin(v1, v2);
618}
619
620extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
621    return fmin(v1, v2);
622}
623
624extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
625    return fmin(v1, v2);
626}
627
628/*
629 * YUV
630 */
631
632extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
633    short Y = ((short)y) - 16;
634    short U = ((short)u) - 128;
635    short V = ((short)v) - 128;
636
637    short4 p;
638    p.r = (Y * 298 + V * 409 + 128) >> 8;
639    p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
640    p.b = (Y * 298 + U * 516 + 128) >> 8;
641    p.a = 255;
642    p.r = rsClamp(p.r, (short)0, (short)255);
643    p.g = rsClamp(p.g, (short)0, (short)255);
644    p.b = rsClamp(p.b, (short)0, (short)255);
645
646    return convert_uchar4(p);
647}
648
649/*
650 * half_RECIP
651 */
652
653extern float2 __attribute__((overloadable)) half_recip(float2 v) {
654    return ((float2) 1.f) / v;
655}
656
657extern float3 __attribute__((overloadable)) half_recip(float3 v) {
658    return ((float3) 1.f) / v;
659}
660
661extern float4 __attribute__((overloadable)) half_recip(float4 v) {
662    return ((float4) 1.f) / v;
663}
664
665
666
667/*
668 * half_rsqrt
669 */
670
671extern float __attribute__((overloadable)) half_rsqrt(float v) {
672    return 1.f / sqrt(v);
673}
674
675extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
676    float2 r;
677    r.x = half_rsqrt(v.x);
678    r.y = half_rsqrt(v.y);
679    return r;
680}
681
682extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
683    float3 r;
684    r.x = half_rsqrt(v.x);
685    r.y = half_rsqrt(v.y);
686    r.z = half_rsqrt(v.z);
687    return r;
688}
689
690extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
691    float4 r;
692    r.x = half_rsqrt(v.x);
693    r.y = half_rsqrt(v.y);
694    r.z = half_rsqrt(v.z);
695    r.w = half_rsqrt(v.w);
696    return r;
697}
698
699/**
700 * matrix ops
701 */
702
703extern float4 __attribute__((overloadable))
704rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
705    float4 ret;
706    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
707    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
708    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
709    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
710    return ret;
711}
712
713extern float4 __attribute__((overloadable))
714rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
715    float4 ret;
716    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
717    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
718    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
719    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
720    return ret;
721}
722
723extern float4 __attribute__((overloadable))
724rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
725    float4 ret;
726    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
727    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
728    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
729    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
730    return ret;
731}
732
733extern float3 __attribute__((overloadable))
734rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
735    float3 ret;
736    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
737    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
738    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
739    return ret;
740}
741
742extern float3 __attribute__((overloadable))
743rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
744    float3 ret;
745    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
746    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
747    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
748    return ret;
749}
750
751/**
752 * Pixel Ops
753 */
754extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
755{
756    uchar4 c;
757    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
758    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
759    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
760    c.w = 255;
761    return c;
762}
763
764extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
765{
766    uchar4 c;
767    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
768    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
769    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
770    c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
771    return c;
772}
773
774extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
775{
776    color *= 255.f;
777    color += 0.5f;
778    color = clamp(color, 0.f, 255.f);
779    uchar4 c = {color.x, color.y, color.z, 255};
780    return c;
781}
782
783extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
784{
785    color *= 255.f;
786    color += 0.5f;
787    color = clamp(color, 0.f, 255.f);
788    uchar4 c = {color.x, color.y, color.z, color.w};
789    return c;
790}
791