1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rs_types.rsh"
19
20extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
21extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
22extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
23extern float4 __attribute__((overloadable)) convert_float4(uchar4);
24extern float __attribute__((overloadable)) sqrt(float);
25
26/*
27 * CLAMP
28 */
29#define _CLAMP(T) \
30extern T __attribute__((overloadable)) clamp(T amount, T low, T high) {             \
31    return amount < low ? low : (amount > high ? high : amount);                    \
32}                                                                                   \
33                                                                                    \
34extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
35    T##2 r;                                                                         \
36    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
37    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
38    return r;                                                                       \
39}                                                                                   \
40                                                                                    \
41extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
42    T##3 r;                                                                         \
43    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
44    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
45    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
46    return r;                                                                       \
47}                                                                                   \
48                                                                                    \
49extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
50    T##4 r;                                                                         \
51    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);       \
52    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);       \
53    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);       \
54    r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);       \
55    return r;                                                                       \
56}                                                                                   \
57                                                                                    \
58extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) {       \
59    T##2 r;                                                                         \
60    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
61    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
62    return r;                                                                       \
63}                                                                                   \
64                                                                                    \
65extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) {       \
66    T##3 r;                                                                         \
67    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
68    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
69    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
70    return r;                                                                       \
71}                                                                                   \
72                                                                                    \
73extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) {       \
74    T##4 r;                                                                         \
75    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);               \
76    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);               \
77    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);               \
78    r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);               \
79    return r;                                                                       \
80}
81
82#if !defined(__i386__) && !defined(__x86_64__)
83
84_CLAMP(float);
85
86#else
87
88extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
89extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
90extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
91extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
92extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
93extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
94extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
95
96#endif // !defined(__i386__) && !defined(__x86_64__)
97
98_CLAMP(double);
99_CLAMP(char);
100_CLAMP(uchar);
101_CLAMP(short);
102_CLAMP(ushort);
103_CLAMP(int);
104_CLAMP(uint);
105_CLAMP(long);
106_CLAMP(ulong);
107
108#undef _CLAMP
109
110/*
111 * FMAX
112 */
113
114extern float __attribute__((overloadable)) fmax(float v1, float v2) {
115    return v1 > v2 ? v1 : v2;
116}
117
118extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
119    float2 r;
120    r.x = v1.x > v2.x ? v1.x : v2.x;
121    r.y = v1.y > v2.y ? v1.y : v2.y;
122    return r;
123}
124
125extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
126    float3 r;
127    r.x = v1.x > v2.x ? v1.x : v2.x;
128    r.y = v1.y > v2.y ? v1.y : v2.y;
129    r.z = v1.z > v2.z ? v1.z : v2.z;
130    return r;
131}
132
133extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
134    float4 r;
135    r.x = v1.x > v2.x ? v1.x : v2.x;
136    r.y = v1.y > v2.y ? v1.y : v2.y;
137    r.z = v1.z > v2.z ? v1.z : v2.z;
138    r.w = v1.w > v2.w ? v1.w : v2.w;
139    return r;
140}
141
142extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
143    float2 r;
144    r.x = v1.x > v2 ? v1.x : v2;
145    r.y = v1.y > v2 ? v1.y : v2;
146    return r;
147}
148
149extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
150    float3 r;
151    r.x = v1.x > v2 ? v1.x : v2;
152    r.y = v1.y > v2 ? v1.y : v2;
153    r.z = v1.z > v2 ? v1.z : v2;
154    return r;
155}
156
157extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
158    float4 r;
159    r.x = v1.x > v2 ? v1.x : v2;
160    r.y = v1.y > v2 ? v1.y : v2;
161    r.z = v1.z > v2 ? v1.z : v2;
162    r.w = v1.w > v2 ? v1.w : v2;
163    return r;
164}
165
166extern float __attribute__((overloadable)) fmin(float v1, float v2) {
167    return v1 < v2 ? v1 : v2;
168}
169
170
171/*
172 * FMIN
173 */
174extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
175    float2 r;
176    r.x = v1.x < v2.x ? v1.x : v2.x;
177    r.y = v1.y < v2.y ? v1.y : v2.y;
178    return r;
179}
180
181extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
182    float3 r;
183    r.x = v1.x < v2.x ? v1.x : v2.x;
184    r.y = v1.y < v2.y ? v1.y : v2.y;
185    r.z = v1.z < v2.z ? v1.z : v2.z;
186    return r;
187}
188
189extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
190    float4 r;
191    r.x = v1.x < v2.x ? v1.x : v2.x;
192    r.y = v1.y < v2.y ? v1.y : v2.y;
193    r.z = v1.z < v2.z ? v1.z : v2.z;
194    r.w = v1.w < v2.w ? v1.w : v2.w;
195    return r;
196}
197
198extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
199    float2 r;
200    r.x = v1.x < v2 ? v1.x : v2;
201    r.y = v1.y < v2 ? v1.y : v2;
202    return r;
203}
204
205extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
206    float3 r;
207    r.x = v1.x < v2 ? v1.x : v2;
208    r.y = v1.y < v2 ? v1.y : v2;
209    r.z = v1.z < v2 ? v1.z : v2;
210    return r;
211}
212
213extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
214    float4 r;
215    r.x = v1.x < v2 ? v1.x : v2;
216    r.y = v1.y < v2 ? v1.y : v2;
217    r.z = v1.z < v2 ? v1.z : v2;
218    r.w = v1.w < v2 ? v1.w : v2;
219    return r;
220}
221
222
223/*
224 * MAX
225 */
226
227extern char __attribute__((overloadable)) max(char v1, char v2) {
228    return v1 > v2 ? v1 : v2;
229}
230
231extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
232    char2 r;
233    r.x = v1.x > v2.x ? v1.x : v2.x;
234    r.y = v1.y > v2.y ? v1.y : v2.y;
235    return r;
236}
237
238extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
239    char3 r;
240    r.x = v1.x > v2.x ? v1.x : v2.x;
241    r.y = v1.y > v2.y ? v1.y : v2.y;
242    r.z = v1.z > v2.z ? v1.z : v2.z;
243    return r;
244}
245
246extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
247    char4 r;
248    r.x = v1.x > v2.x ? v1.x : v2.x;
249    r.y = v1.y > v2.y ? v1.y : v2.y;
250    r.z = v1.z > v2.z ? v1.z : v2.z;
251    r.w = v1.w > v2.w ? v1.w : v2.w;
252    return r;
253}
254
255extern short __attribute__((overloadable)) max(short v1, short v2) {
256    return v1 > v2 ? v1 : v2;
257}
258
259extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
260    short2 r;
261    r.x = v1.x > v2.x ? v1.x : v2.x;
262    r.y = v1.y > v2.y ? v1.y : v2.y;
263    return r;
264}
265
266extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
267    short3 r;
268    r.x = v1.x > v2.x ? v1.x : v2.x;
269    r.y = v1.y > v2.y ? v1.y : v2.y;
270    r.z = v1.z > v2.z ? v1.z : v2.z;
271    return r;
272}
273
274extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
275    short4 r;
276    r.x = v1.x > v2.x ? v1.x : v2.x;
277    r.y = v1.y > v2.y ? v1.y : v2.y;
278    r.z = v1.z > v2.z ? v1.z : v2.z;
279    r.w = v1.w > v2.w ? v1.w : v2.w;
280    return r;
281}
282
283extern int __attribute__((overloadable)) max(int v1, int v2) {
284    return v1 > v2 ? v1 : v2;
285}
286
287extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
288    int2 r;
289    r.x = v1.x > v2.x ? v1.x : v2.x;
290    r.y = v1.y > v2.y ? v1.y : v2.y;
291    return r;
292}
293
294extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
295    int3 r;
296    r.x = v1.x > v2.x ? v1.x : v2.x;
297    r.y = v1.y > v2.y ? v1.y : v2.y;
298    r.z = v1.z > v2.z ? v1.z : v2.z;
299    return r;
300}
301
302extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
303    int4 r;
304    r.x = v1.x > v2.x ? v1.x : v2.x;
305    r.y = v1.y > v2.y ? v1.y : v2.y;
306    r.z = v1.z > v2.z ? v1.z : v2.z;
307    r.w = v1.w > v2.w ? v1.w : v2.w;
308    return r;
309}
310
311extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
312    return v1 > v2 ? v1 : v2;
313}
314
315extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
316    uchar2 r;
317    r.x = v1.x > v2.x ? v1.x : v2.x;
318    r.y = v1.y > v2.y ? v1.y : v2.y;
319    return r;
320}
321
322extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
323    uchar3 r;
324    r.x = v1.x > v2.x ? v1.x : v2.x;
325    r.y = v1.y > v2.y ? v1.y : v2.y;
326    r.z = v1.z > v2.z ? v1.z : v2.z;
327    return r;
328}
329
330extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
331    uchar4 r;
332    r.x = v1.x > v2.x ? v1.x : v2.x;
333    r.y = v1.y > v2.y ? v1.y : v2.y;
334    r.z = v1.z > v2.z ? v1.z : v2.z;
335    r.w = v1.w > v2.w ? v1.w : v2.w;
336    return r;
337}
338
339extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
340    return v1 > v2 ? v1 : v2;
341}
342
343extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
344    ushort2 r;
345    r.x = v1.x > v2.x ? v1.x : v2.x;
346    r.y = v1.y > v2.y ? v1.y : v2.y;
347    return r;
348}
349
350extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
351    ushort3 r;
352    r.x = v1.x > v2.x ? v1.x : v2.x;
353    r.y = v1.y > v2.y ? v1.y : v2.y;
354    r.z = v1.z > v2.z ? v1.z : v2.z;
355    return r;
356}
357
358extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
359    ushort4 r;
360    r.x = v1.x > v2.x ? v1.x : v2.x;
361    r.y = v1.y > v2.y ? v1.y : v2.y;
362    r.z = v1.z > v2.z ? v1.z : v2.z;
363    r.w = v1.w > v2.w ? v1.w : v2.w;
364    return r;
365}
366
367extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
368    return v1 > v2 ? v1 : v2;
369}
370
371extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
372    uint2 r;
373    r.x = v1.x > v2.x ? v1.x : v2.x;
374    r.y = v1.y > v2.y ? v1.y : v2.y;
375    return r;
376}
377
378extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
379    uint3 r;
380    r.x = v1.x > v2.x ? v1.x : v2.x;
381    r.y = v1.y > v2.y ? v1.y : v2.y;
382    r.z = v1.z > v2.z ? v1.z : v2.z;
383    return r;
384}
385
386extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
387    uint4 r;
388    r.x = v1.x > v2.x ? v1.x : v2.x;
389    r.y = v1.y > v2.y ? v1.y : v2.y;
390    r.z = v1.z > v2.z ? v1.z : v2.z;
391    r.w = v1.w > v2.w ? v1.w : v2.w;
392    return r;
393}
394
395extern float __attribute__((overloadable)) max(float v1, float v2) {
396    return fmax(v1, v2);
397}
398
399extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
400    return fmax(v1, v2);
401}
402
403extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
404    return fmax(v1, v2);
405}
406
407extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
408    return fmax(v1, v2);
409}
410
411extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
412    return fmax(v1, v2);
413}
414
415extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
416    return fmax(v1, v2);
417}
418
419extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
420    return fmax(v1, v2);
421}
422
423
424/*
425 * MIN
426 */
427
428extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
429    return v1 < v2 ? v1 : v2;
430}
431
432extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
433    char2 r;
434    r.x = v1.x < v2.x ? v1.x : v2.x;
435    r.y = v1.y < v2.y ? v1.y : v2.y;
436    return r;
437}
438
439extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
440    char3 r;
441    r.x = v1.x < v2.x ? v1.x : v2.x;
442    r.y = v1.y < v2.y ? v1.y : v2.y;
443    r.z = v1.z < v2.z ? v1.z : v2.z;
444    return r;
445}
446
447extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
448    char4 r;
449    r.x = v1.x < v2.x ? v1.x : v2.x;
450    r.y = v1.y < v2.y ? v1.y : v2.y;
451    r.z = v1.z < v2.z ? v1.z : v2.z;
452    r.w = v1.w < v2.w ? v1.w : v2.w;
453    return r;
454}
455
456extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
457    return v1 < v2 ? v1 : v2;
458}
459
460extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
461    short2 r;
462    r.x = v1.x < v2.x ? v1.x : v2.x;
463    r.y = v1.y < v2.y ? v1.y : v2.y;
464    return r;
465}
466
467extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
468    short3 r;
469    r.x = v1.x < v2.x ? v1.x : v2.x;
470    r.y = v1.y < v2.y ? v1.y : v2.y;
471    r.z = v1.z < v2.z ? v1.z : v2.z;
472    return r;
473}
474
475extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
476    short4 r;
477    r.x = v1.x < v2.x ? v1.x : v2.x;
478    r.y = v1.y < v2.y ? v1.y : v2.y;
479    r.z = v1.z < v2.z ? v1.z : v2.z;
480    r.w = v1.w < v2.w ? v1.w : v2.w;
481    return r;
482}
483
484extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
485    return v1 < v2 ? v1 : v2;
486}
487
488extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
489    int2 r;
490    r.x = v1.x < v2.x ? v1.x : v2.x;
491    r.y = v1.y < v2.y ? v1.y : v2.y;
492    return r;
493}
494
495extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
496    int3 r;
497    r.x = v1.x < v2.x ? v1.x : v2.x;
498    r.y = v1.y < v2.y ? v1.y : v2.y;
499    r.z = v1.z < v2.z ? v1.z : v2.z;
500    return r;
501}
502
503extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
504    int4 r;
505    r.x = v1.x < v2.x ? v1.x : v2.x;
506    r.y = v1.y < v2.y ? v1.y : v2.y;
507    r.z = v1.z < v2.z ? v1.z : v2.z;
508    r.w = v1.w < v2.w ? v1.w : v2.w;
509    return r;
510}
511
512extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
513    return v1 < v2 ? v1 : v2;
514}
515
516extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
517    uchar2 r;
518    r.x = v1.x < v2.x ? v1.x : v2.x;
519    r.y = v1.y < v2.y ? v1.y : v2.y;
520    return r;
521}
522
523extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
524    uchar3 r;
525    r.x = v1.x < v2.x ? v1.x : v2.x;
526    r.y = v1.y < v2.y ? v1.y : v2.y;
527    r.z = v1.z < v2.z ? v1.z : v2.z;
528    return r;
529}
530
531extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
532    uchar4 r;
533    r.x = v1.x < v2.x ? v1.x : v2.x;
534    r.y = v1.y < v2.y ? v1.y : v2.y;
535    r.z = v1.z < v2.z ? v1.z : v2.z;
536    r.w = v1.w < v2.w ? v1.w : v2.w;
537    return r;
538}
539
540extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
541    return v1 < v2 ? v1 : v2;
542}
543
544extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
545    ushort2 r;
546    r.x = v1.x < v2.x ? v1.x : v2.x;
547    r.y = v1.y < v2.y ? v1.y : v2.y;
548    return r;
549}
550
551extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
552    ushort3 r;
553    r.x = v1.x < v2.x ? v1.x : v2.x;
554    r.y = v1.y < v2.y ? v1.y : v2.y;
555    r.z = v1.z < v2.z ? v1.z : v2.z;
556    return r;
557}
558
559extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
560    ushort4 r;
561    r.x = v1.x < v2.x ? v1.x : v2.x;
562    r.y = v1.y < v2.y ? v1.y : v2.y;
563    r.z = v1.z < v2.z ? v1.z : v2.z;
564    r.w = v1.w < v2.w ? v1.w : v2.w;
565    return r;
566}
567
568extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
569    return v1 < v2 ? v1 : v2;
570}
571
572extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
573    uint2 r;
574    r.x = v1.x < v2.x ? v1.x : v2.x;
575    r.y = v1.y < v2.y ? v1.y : v2.y;
576    return r;
577}
578
579extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
580    uint3 r;
581    r.x = v1.x < v2.x ? v1.x : v2.x;
582    r.y = v1.y < v2.y ? v1.y : v2.y;
583    r.z = v1.z < v2.z ? v1.z : v2.z;
584    return r;
585}
586
587extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
588    uint4 r;
589    r.x = v1.x < v2.x ? v1.x : v2.x;
590    r.y = v1.y < v2.y ? v1.y : v2.y;
591    r.z = v1.z < v2.z ? v1.z : v2.z;
592    r.w = v1.w < v2.w ? v1.w : v2.w;
593    return r;
594}
595
596extern float __attribute__((overloadable)) min(float v1, float v2) {
597    return fmin(v1, v2);
598}
599
600extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
601    return fmin(v1, v2);
602}
603
604extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
605    return fmin(v1, v2);
606}
607
608extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
609    return fmin(v1, v2);
610}
611
612extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
613    return fmin(v1, v2);
614}
615
616extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
617    return fmin(v1, v2);
618}
619
620extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
621    return fmin(v1, v2);
622}
623
624/*
625 * YUV
626 */
627
628extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
629    short Y = ((short)y) - 16;
630    short U = ((short)u) - 128;
631    short V = ((short)v) - 128;
632
633    short4 p;
634    p.r = (Y * 298 + V * 409 + 128) >> 8;
635    p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
636    p.b = (Y * 298 + U * 516 + 128) >> 8;
637    p.a = 255;
638    p.r = rsClamp(p.r, (short)0, (short)255);
639    p.g = rsClamp(p.g, (short)0, (short)255);
640    p.b = rsClamp(p.b, (short)0, (short)255);
641
642    return convert_uchar4(p);
643}
644
645static float4 yuv_U_values = {0.f, -0.392f * 0.003921569f, +2.02 * 0.003921569f, 0.f};
646static float4 yuv_V_values = {1.603f * 0.003921569f, -0.815f * 0.003921569f, 0.f, 0.f};
647
648extern float4 __attribute__((overloadable)) rsYuvToRGBA_float4(uchar y, uchar u, uchar v) {
649    float4 color = (float)y * 0.003921569f;
650    float4 fU = ((float)u) - 128.f;
651    float4 fV = ((float)v) - 128.f;
652
653    color += fU * yuv_U_values;
654    color += fV * yuv_V_values;
655    color = clamp(color, 0.f, 1.f);
656    return color;
657}
658
659
660/*
661 * half_RECIP
662 */
663
664extern float2 __attribute__((overloadable)) half_recip(float2 v) {
665    return ((float2) 1.f) / v;
666}
667
668extern float3 __attribute__((overloadable)) half_recip(float3 v) {
669    return ((float3) 1.f) / v;
670}
671
672extern float4 __attribute__((overloadable)) half_recip(float4 v) {
673    return ((float4) 1.f) / v;
674}
675
676
677
678/*
679 * half_rsqrt
680 */
681
682extern float __attribute__((overloadable)) half_rsqrt(float v) {
683    return 1.f / sqrt(v);
684}
685
686extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
687    float2 r;
688    r.x = half_rsqrt(v.x);
689    r.y = half_rsqrt(v.y);
690    return r;
691}
692
693extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
694    float3 r;
695    r.x = half_rsqrt(v.x);
696    r.y = half_rsqrt(v.y);
697    r.z = half_rsqrt(v.z);
698    return r;
699}
700
701extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
702    float4 r;
703    r.x = half_rsqrt(v.x);
704    r.y = half_rsqrt(v.y);
705    r.z = half_rsqrt(v.z);
706    r.w = half_rsqrt(v.w);
707    return r;
708}
709
710/**
711 * matrix ops
712 */
713
714extern float4 __attribute__((overloadable))
715rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
716    float4 ret;
717    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
718    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
719    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
720    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
721    return ret;
722}
723
724extern float4 __attribute__((overloadable))
725rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
726    float4 ret;
727    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
728    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
729    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
730    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
731    return ret;
732}
733
734extern float4 __attribute__((overloadable))
735rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
736    float4 ret;
737    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
738    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
739    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
740    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
741    return ret;
742}
743
744extern float3 __attribute__((overloadable))
745rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
746    float3 ret;
747    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
748    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
749    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
750    return ret;
751}
752
753extern float3 __attribute__((overloadable))
754rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
755    float3 ret;
756    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
757    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
758    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
759    return ret;
760}
761
762/**
763 * Pixel Ops
764 */
765extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
766{
767    uchar4 c;
768    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
769    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
770    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
771    c.w = 255;
772    return c;
773}
774
775extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
776{
777    uchar4 c;
778    c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
779    c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
780    c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
781    c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
782    return c;
783}
784
785extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
786{
787    color *= 255.f;
788    color += 0.5f;
789    color = clamp(color, 0.f, 255.f);
790    uchar4 c = {color.x, color.y, color.z, 255};
791    return c;
792}
793
794extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
795{
796    color *= 255.f;
797    color += 0.5f;
798    color = clamp(color, 0.f, 255.f);
799    uchar4 c = {color.x, color.y, color.z, color.w};
800    return c;
801}
802