1#include "rs_core.rsh"
2#include "rs_structs.h"
3
4#include "rsCpuCoreRuntime.h"
5
6extern float __attribute__((overloadable)) rsFrac(float v) {
7    int i = (int)floor(v);
8    return fmin(v - i, 0x1.fffffep-1f);
9}
10
11/* Function declarations from libRS */
12extern float4 __attribute__((overloadable)) convert_float4(uchar4 c);
13
14/* Implementation of Core Runtime */
15
16extern float4 rsUnpackColor8888(uchar4 c)
17{
18    return convert_float4(c) * 0.003921569f;
19}
20
21
22extern float __attribute__((overloadable)) rsClamp(float v, float l, float h) {
23    return clamp(v, l, h);
24}
25extern char __attribute__((overloadable)) rsClamp(char v, char l, char h) {
26    return clamp(v, l, h);
27}
28extern uchar __attribute__((overloadable)) rsClamp(uchar v, uchar l, uchar h) {
29    return clamp(v, l, h);
30}
31extern short __attribute__((overloadable)) rsClamp(short v, short l, short h) {
32    return clamp(v, l, h);
33}
34extern ushort __attribute__((overloadable)) rsClamp(ushort v, ushort l, ushort h) {
35    return clamp(v, l, h);
36}
37extern int __attribute__((overloadable)) rsClamp(int v, int l, int h) {
38    return clamp(v, l, h);
39}
40extern uint __attribute__((overloadable)) rsClamp(uint v, uint l, uint h) {
41    return clamp(v, l, h);
42}
43
44extern int32_t __attribute__((overloadable)) rsAtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
45    return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
46}
47
48extern uint32_t __attribute__((overloadable)) rsAtomicCas(volatile uint32_t *ptr, uint32_t expectedValue, uint32_t newValue) {
49    return __sync_val_compare_and_swap(ptr, expectedValue, newValue);
50}
51
52extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile int32_t *ptr) {
53    return __sync_fetch_and_add(ptr, 1);
54}
55
56extern int32_t __attribute__((overloadable)) rsAtomicInc(volatile uint32_t *ptr) {
57    return __sync_fetch_and_add(ptr, 1);
58}
59
60extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile int32_t *ptr) {
61    return __sync_fetch_and_sub(ptr, 1);
62}
63
64extern int32_t __attribute__((overloadable)) rsAtomicDec(volatile uint32_t *ptr) {
65    return __sync_fetch_and_sub(ptr, 1);
66}
67
68extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile int32_t *ptr, int32_t value) {
69    return __sync_fetch_and_add(ptr, value);
70}
71
72extern int32_t __attribute__((overloadable)) rsAtomicAdd(volatile uint32_t *ptr, uint32_t value) {
73    return __sync_fetch_and_add(ptr, value);
74}
75
76extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile int32_t *ptr, int32_t value) {
77    return __sync_fetch_and_sub(ptr, value);
78}
79
80extern int32_t __attribute__((overloadable)) rsAtomicSub(volatile uint32_t *ptr, uint32_t value) {
81    return __sync_fetch_and_sub(ptr, value);
82}
83
84extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile int32_t *ptr, int32_t value) {
85    return __sync_fetch_and_and(ptr, value);
86}
87
88extern int32_t __attribute__((overloadable)) rsAtomicAnd(volatile uint32_t *ptr, uint32_t value) {
89    return __sync_fetch_and_and(ptr, value);
90}
91
92extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile int32_t *ptr, int32_t value) {
93    return __sync_fetch_and_or(ptr, value);
94}
95
96extern int32_t __attribute__((overloadable)) rsAtomicOr(volatile uint32_t *ptr, uint32_t value) {
97    return __sync_fetch_and_or(ptr, value);
98}
99
100extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile int32_t *ptr, int32_t value) {
101    return __sync_fetch_and_xor(ptr, value);
102}
103
104extern int32_t __attribute__((overloadable)) rsAtomicXor(volatile uint32_t *ptr, uint32_t value) {
105    return __sync_fetch_and_xor(ptr, value);
106}
107
108extern uint32_t __attribute__((overloadable)) min(uint32_t, uint32_t);
109extern int32_t __attribute__((overloadable)) min(int32_t, int32_t);
110extern uint32_t __attribute__((overloadable)) max(uint32_t, uint32_t);
111extern int32_t __attribute__((overloadable)) max(int32_t, int32_t);
112
113extern uint32_t __attribute__((overloadable)) rsAtomicMin(volatile uint32_t *ptr, uint32_t value) {
114    uint32_t prev, status;
115    do {
116        prev = *ptr;
117        uint32_t n = min(value, prev);
118        status = __sync_val_compare_and_swap(ptr, prev, n);
119    } while (status != prev);
120    return prev;
121}
122
123extern int32_t __attribute__((overloadable)) rsAtomicMin(volatile int32_t *ptr, int32_t value) {
124    int32_t prev, status;
125    do {
126        prev = *ptr;
127        int32_t n = min(value, prev);
128        status = __sync_val_compare_and_swap(ptr, prev, n);
129    } while (status != prev);
130    return prev;
131}
132
133extern uint32_t __attribute__((overloadable)) rsAtomicMax(volatile uint32_t *ptr, uint32_t value) {
134    uint32_t prev, status;
135    do {
136        prev = *ptr;
137        uint32_t n = max(value, prev);
138        status = __sync_val_compare_and_swap(ptr, prev, n);
139    } while (status != prev);
140    return prev;
141}
142
143extern int32_t __attribute__((overloadable)) rsAtomicMax(volatile int32_t *ptr, int32_t value) {
144    int32_t prev, status;
145    do {
146        prev = *ptr;
147        int32_t n = max(value, prev);
148        status = __sync_val_compare_and_swap(ptr, prev, n);
149    } while (status != prev);
150    return prev;
151}
152
153
154
155extern int32_t rand();
156#define RAND_MAX 0x7fffffff
157
158
159
160extern float __attribute__((overloadable)) rsRand(float min, float max);/* {
161    float r = (float)rand();
162    r /= RAND_MAX;
163    r = r * (max - min) + min;
164    return r;
165}
166*/
167
168extern float __attribute__((overloadable)) rsRand(float max) {
169    return rsRand(0.f, max);
170    //float r = (float)rand();
171    //r *= max;
172    //r /= RAND_MAX;
173    //return r;
174}
175
176extern int __attribute__((overloadable)) rsRand(int max) {
177    return (int)rsRand((float)max);
178}
179
180extern int __attribute__((overloadable)) rsRand(int min, int max) {
181    return (int)rsRand((float)min, (float)max);
182}
183
184extern uint32_t __attribute__((overloadable)) rsGetArray0(rs_kernel_context ctxt) {
185    return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[0];
186}
187
188extern uint32_t __attribute__((overloadable)) rsGetArray1(rs_kernel_context ctxt) {
189    return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[1];
190}
191
192extern uint32_t __attribute__((overloadable)) rsGetArray2(rs_kernel_context ctxt) {
193    return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[2];
194}
195
196extern uint32_t __attribute__((overloadable)) rsGetArray3(rs_kernel_context ctxt) {
197    return ((struct RsExpandKernelDriverInfo *)ctxt)->current.array[3];
198}
199
200extern rs_allocation_cubemap_face __attribute__((overloadable)) rsGetFace(rs_kernel_context ctxt) {
201    return (rs_allocation_cubemap_face)(((struct RsExpandKernelDriverInfo *)ctxt)->current.face);
202}
203
204extern uint32_t __attribute__((overloadable)) rsGetLod(rs_kernel_context ctxt) {
205    return ((struct RsExpandKernelDriverInfo *)ctxt)->current.lod;
206}
207
208extern uint32_t __attribute__((overloadable)) rsGetDimX(rs_kernel_context ctxt) {
209    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.x;
210}
211
212extern uint32_t __attribute__((overloadable)) rsGetDimY(rs_kernel_context ctxt) {
213    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.y;
214}
215
216extern uint32_t __attribute__((overloadable)) rsGetDimZ(rs_kernel_context ctxt) {
217    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.z;
218}
219
220extern uint32_t __attribute__((overloadable)) rsGetDimArray0(rs_kernel_context ctxt) {
221    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[0];
222}
223
224extern uint32_t __attribute__((overloadable)) rsGetDimArray1(rs_kernel_context ctxt) {
225    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[1];
226}
227
228extern uint32_t __attribute__((overloadable)) rsGetDimArray2(rs_kernel_context ctxt) {
229    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[2];
230}
231
232extern uint32_t __attribute__((overloadable)) rsGetDimArray3(rs_kernel_context ctxt) {
233    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.array[3];
234}
235
236extern bool __attribute__((overloadable)) rsGetDimHasFaces(rs_kernel_context ctxt) {
237    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.face != 0;
238}
239
240extern uint32_t __attribute__((overloadable)) rsGetDimLod(rs_kernel_context ctxt) {
241    return ((struct RsExpandKernelDriverInfo *)ctxt)->dim.lod;
242}
243
244#define PRIM_DEBUG(T)                               \
245extern void __attribute__((overloadable)) rsDebug(const char *, const T *);     \
246void __attribute__((overloadable)) rsDebug(const char *txt, T val) {            \
247    rsDebug(txt, &val);                                                         \
248}
249
250PRIM_DEBUG(char2)
251PRIM_DEBUG(char3)
252PRIM_DEBUG(char4)
253PRIM_DEBUG(uchar2)
254PRIM_DEBUG(uchar3)
255PRIM_DEBUG(uchar4)
256PRIM_DEBUG(short2)
257PRIM_DEBUG(short3)
258PRIM_DEBUG(short4)
259PRIM_DEBUG(ushort2)
260PRIM_DEBUG(ushort3)
261PRIM_DEBUG(ushort4)
262PRIM_DEBUG(int2)
263PRIM_DEBUG(int3)
264PRIM_DEBUG(int4)
265PRIM_DEBUG(uint2)
266PRIM_DEBUG(uint3)
267PRIM_DEBUG(uint4)
268PRIM_DEBUG(long2)
269PRIM_DEBUG(long3)
270PRIM_DEBUG(long4)
271PRIM_DEBUG(ulong2)
272PRIM_DEBUG(ulong3)
273PRIM_DEBUG(ulong4)
274PRIM_DEBUG(float2)
275PRIM_DEBUG(float3)
276PRIM_DEBUG(float4)
277PRIM_DEBUG(double2)
278PRIM_DEBUG(double3)
279PRIM_DEBUG(double4)
280
281#undef PRIM_DEBUG
282
283// Convert the half values to float before handing off to the driver.  This
284// eliminates the need in the driver to properly support the half datatype
285// (either by adding compiler flags for half or link against compiler_rt).
286// Also, pass the bit-equivalent ushort to be printed.
287extern void __attribute__((overloadable)) rsDebug(const char *s, float f,
288                                                  ushort us);
289extern void __attribute__((overloadable)) rsDebug(const char *s, half h) {
290    rsDebug(s, (float) h, *(ushort *) &h);
291}
292
293extern void __attribute__((overloadable)) rsDebug(const char *s,
294                                                  const float2 *f,
295                                                  const ushort2 *us);
296extern void __attribute__((overloadable)) rsDebug(const char *s, half2 h2) {
297    float2 f = convert_float2(h2);
298    rsDebug(s, &f, (ushort2 *) &h2);
299}
300
301extern void __attribute__((overloadable)) rsDebug(const char *s,
302                                                  const float3 *f,
303                                                  const ushort3 *us);
304extern void __attribute__((overloadable)) rsDebug(const char *s, half3 h3) {
305    float3 f = convert_float3(h3);
306    rsDebug(s, &f, (ushort3 *) &h3);
307}
308
309extern void __attribute__((overloadable)) rsDebug(const char *s,
310                                                  const float4 *f,
311                                                  const ushort4 *us);
312extern void __attribute__((overloadable)) rsDebug(const char *s, half4 h4) {
313    float4 f = convert_float4(h4);
314    rsDebug(s, &f, (ushort4 *) &h4);
315}
316