1#include "rs_core.rsh"
2#include "rs_graphics.rsh"
3#include "rs_structs.h"
4
5/**
6* Allocation sampling
7*/
8static const void * __attribute__((overloadable))
9        getElementAt(rs_allocation a, uint32_t x, uint32_t lod) {
10    Allocation_t *alloc = (Allocation_t *)a.p;
11    const Type_t *type = (const Type_t*)alloc->mHal.state.type;
12    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr;
13
14    const uint32_t offset = type->mHal.state.lodOffset[lod];
15    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
16
17    return &p[offset + eSize * x];
18}
19
20static const void * __attribute__((overloadable))
21        getElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t lod) {
22    Allocation_t *alloc = (Allocation_t *)a.p;
23    const Type_t *type = (const Type_t*)alloc->mHal.state.type;
24    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr;
25
26    const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
27    const uint32_t offset = type->mHal.state.lodOffset[lod];
28    uint32_t stride;
29    if(lod == 0) {
30        stride = alloc->mHal.drvState.stride;
31    } else {
32        stride = type->mHal.state.lodDimX[lod] * eSize;
33    }
34
35    return &p[offset + (eSize * x) + (y * stride)];
36}
37
38static const void * __attribute__((overloadable))
39        getElementAt(rs_allocation a, uint2 uv, uint32_t lod) {
40    return getElementAt(a, uv.x, uv.y, lod);
41}
42
43static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
44    if (wrap == RS_SAMPLER_WRAP) {
45        coord = coord % size;
46        if (coord < 0) {
47            coord += size;
48        }
49    }
50    return (uint32_t)max(0, min(coord, size - 1));
51}
52
53// 565 Conversion bits taken from SkBitmap
54#define SK_R16_BITS     5
55#define SK_G16_BITS     6
56#define SK_B16_BITS     5
57
58#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
59#define SK_G16_SHIFT    (SK_B16_BITS)
60#define SK_B16_SHIFT    0
61
62#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
63#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
64#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
65
66#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
67#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
68#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
69
70static inline unsigned SkR16ToR32(unsigned r) {
71    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
72}
73
74static inline unsigned SkG16ToG32(unsigned g) {
75    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
76}
77
78static inline unsigned SkB16ToB32(unsigned b) {
79    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
80}
81
82#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
83#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
84#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
85
86static float3 getFrom565(uint16_t color) {
87    float3 result;
88    result.x = (float)SkPacked16ToR32(color);
89    result.y = (float)SkPacked16ToG32(color);
90    result.z = (float)SkPacked16ToB32(color);
91    return result;
92}
93
94#define SAMPLE_1D_FUNC(vecsize, intype, outtype, convert)                                       \
95        static outtype __attribute__((overloadable))                                            \
96                getSample##vecsize(rs_allocation a, float2 weights,                             \
97                                   uint32_t iPixel, uint32_t next, uint32_t lod) {              \
98            intype *p0c = (intype*)getElementAt(a, iPixel, lod);                                \
99            intype *p1c = (intype*)getElementAt(a, next, lod);                                  \
100            outtype p0 = convert(*p0c);                                                         \
101            outtype p1 = convert(*p1c);                                                         \
102            return p0 * weights.x + p1 * weights.y;                                             \
103        }
104#define SAMPLE_2D_FUNC(vecsize, intype, outtype, convert)                                       \
105        static outtype __attribute__((overloadable))                                            \
106                    getSample##vecsize(rs_allocation a, float4 weights,                         \
107                                       uint2 iPixel, uint2 next, uint32_t lod) {                \
108            intype *p0c = (intype*)getElementAt(a, iPixel.x, iPixel.y, lod);                    \
109            intype *p1c = (intype*)getElementAt(a, next.x, iPixel.y, lod);                      \
110            intype *p2c = (intype*)getElementAt(a, iPixel.x, next.y, lod);                      \
111            intype *p3c = (intype*)getElementAt(a, next.x, next.y, lod);                        \
112            outtype p0 = convert(*p0c);                                                         \
113            outtype p1 = convert(*p1c);                                                         \
114            outtype p2 = convert(*p2c);                                                         \
115            outtype p3 = convert(*p3c);                                                         \
116            return p0 * weights.x + p1 * weights.y + p2 * weights.z + p3 * weights.w;           \
117        }
118
119SAMPLE_1D_FUNC(1, uchar, float, (float))
120SAMPLE_1D_FUNC(2, uchar2, float2, convert_float2)
121SAMPLE_1D_FUNC(3, uchar3, float3, convert_float3)
122SAMPLE_1D_FUNC(4, uchar4, float4, convert_float4)
123SAMPLE_1D_FUNC(565, uint16_t, float3, getFrom565)
124
125SAMPLE_2D_FUNC(1, uchar, float, (float))
126SAMPLE_2D_FUNC(2, uchar2, float2, convert_float2)
127SAMPLE_2D_FUNC(3, uchar3, float3, convert_float3)
128SAMPLE_2D_FUNC(4, uchar4, float4, convert_float4)
129SAMPLE_2D_FUNC(565, uint16_t, float3, getFrom565)
130
131// Sampler function body is the same for all dimensions
132#define SAMPLE_FUNC_BODY()                                                                      \
133{                                                                                               \
134    rs_element elem = rsAllocationGetElement(a);                                                \
135    rs_data_kind dk = rsElementGetDataKind(elem);                                               \
136    rs_data_type dt = rsElementGetDataType(elem);                                               \
137                                                                                                \
138    if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) {     \
139        float4 zero = {0.0f, 0.0f, 0.0f, 0.0f};                                                 \
140        return zero;                                                                            \
141    }                                                                                           \
142                                                                                                \
143    uint32_t vecSize = rsElementGetVectorSize(elem);                                            \
144    Allocation_t *alloc = (Allocation_t *)a.p;                                                  \
145    const Type_t *type = (const Type_t*)alloc->mHal.state.type;                                 \
146                                                                                                \
147    rs_sampler_value sampleMin = rsSamplerGetMinification(s);                                  \
148    rs_sampler_value sampleMag = rsSamplerGetMagnification(s);                                 \
149                                                                                                \
150    if (lod <= 0.0f) {                                                                          \
151        if (sampleMag == RS_SAMPLER_NEAREST) {                                                  \
152            return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0);                     \
153        }                                                                                       \
154        return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, 0);                          \
155    }                                                                                           \
156                                                                                                \
157    if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {                                           \
158        uint32_t maxLOD = type->mHal.state.lodCount - 1;                                        \
159        lod = min(lod, (float)maxLOD);                                                          \
160        uint32_t nearestLOD = (uint32_t)round(lod);                                             \
161        return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, nearestLOD);                 \
162    }                                                                                           \
163                                                                                                \
164    if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {                                            \
165        uint32_t lod0 = (uint32_t)floor(lod);                                                   \
166        uint32_t lod1 = (uint32_t)ceil(lod);                                                    \
167        uint32_t maxLOD = type->mHal.state.lodCount - 1;                                        \
168        lod0 = min(lod0, maxLOD);                                                               \
169        lod1 = min(lod1, maxLOD);                                                               \
170        float4 sample0 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod0);             \
171        float4 sample1 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod1);             \
172        float frac = lod - (float)lod0;                                                         \
173        return sample0 * (1.0f - frac) + sample1 * frac;                                        \
174    }                                                                                           \
175                                                                                                \
176    return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0);                             \
177} // End of sampler function body is the same for all dimensions
178
179// Body of the bilinear sampling function
180#define BILINEAR_SAMPLE_BODY()                                                                  \
181{                                                                                               \
182    float4 result;                                                                              \
183    if (dt == RS_TYPE_UNSIGNED_5_6_5) {                                                         \
184        result.xyz = getSample565(a, weights, iPixel, next, lod);                               \
185        return result;                                                                          \
186    }                                                                                           \
187                                                                                                \
188    switch(vecSize) {                                                                           \
189    case 1:                                                                                     \
190        result.x = getSample1(a, weights, iPixel, next, lod);                                   \
191        break;                                                                                  \
192    case 2:                                                                                     \
193        result.xy = getSample2(a, weights, iPixel, next, lod);                                  \
194        break;                                                                                  \
195    case 3:                                                                                     \
196        result.xyz = getSample3(a, weights, iPixel, next, lod);                                 \
197        break;                                                                                  \
198    case 4:                                                                                     \
199        result = getSample4(a, weights, iPixel, next, lod);                                     \
200        break;                                                                                  \
201    }                                                                                           \
202                                                                                                \
203    return result * 0.003921569f;                                                                              \
204} // End of body of the bilinear sampling function
205
206// Body of the nearest sampling function
207#define NEAREST_SAMPLE_BODY()                                                                   \
208{                                                                                               \
209    float4 result;                                                                              \
210    if (dt == RS_TYPE_UNSIGNED_5_6_5) {                                                         \
211        result.xyz = getFrom565(*(uint16_t*)getElementAt(a, iPixel, lod));                      \
212       return result;                                                                           \
213    }                                                                                           \
214                                                                                                \
215    switch(vecSize) {                                                                           \
216    case 1:                                                                                     \
217        result.x = (float)(*((uchar*)getElementAt(a, iPixel, lod)));                            \
218        break;                                                                                  \
219    case 2:                                                                                     \
220        result.xy = convert_float2(*((uchar2*)getElementAt(a, iPixel, lod)));                   \
221        break;                                                                                  \
222    case 3:                                                                                     \
223        result.xyz = convert_float3(*((uchar3*)getElementAt(a, iPixel, lod)));                  \
224        break;                                                                                  \
225    case 4:                                                                                     \
226        result = convert_float4(*((uchar4*)getElementAt(a, iPixel, lod)));                      \
227        break;                                                                                  \
228    }                                                                                           \
229                                                                                                \
230    return result * 0.003921569f;                                                                              \
231} // End of body of the nearest sampling function
232
233static float4 __attribute__((overloadable))
234        getBilinearSample(rs_allocation a, float2 weights,
235                          uint32_t iPixel, uint32_t next,
236                          uint32_t vecSize, rs_data_type dt, uint32_t lod) {
237    BILINEAR_SAMPLE_BODY()
238}
239
240static float4 __attribute__((overloadable))
241        getBilinearSample(rs_allocation a, float4 weights,
242                          uint2 iPixel, uint2 next,
243                          uint32_t vecSize, rs_data_type dt, uint32_t lod) {
244    BILINEAR_SAMPLE_BODY()
245}
246
247static float4  __attribute__((overloadable))
248        getNearestSample(rs_allocation a, uint32_t iPixel, uint32_t vecSize,
249                         rs_data_type dt, uint32_t lod) {
250    NEAREST_SAMPLE_BODY()
251}
252
253static float4  __attribute__((overloadable))
254        getNearestSample(rs_allocation a, uint2 iPixel, uint32_t vecSize,
255                         rs_data_type dt, uint32_t lod) {
256    NEAREST_SAMPLE_BODY()
257}
258
259static float4 __attribute__((overloadable))
260        sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
261                               uint32_t vecSize, rs_data_type dt,
262                               rs_sampler s,
263                               float uv, uint32_t lod) {
264    rs_sampler_value wrapS = rsSamplerGetWrapS(s);
265    int32_t sourceW = type->mHal.state.lodDimX[lod];
266    float pixelUV = uv * (float)(sourceW);
267    int32_t iPixel = (int32_t)(pixelUV);
268    float frac = pixelUV - (float)iPixel;
269
270    if (frac < 0.5f) {
271        iPixel -= 1;
272        frac += 0.5f;
273    } else {
274        frac -= 0.5f;
275    }
276
277    float oneMinusFrac = 1.0f - frac;
278
279    float2 weights;
280    weights.x = oneMinusFrac;
281    weights.y = frac;
282
283    uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
284    uint32_t location = wrapI(wrapS, iPixel, sourceW);
285
286    return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
287}
288
289static float4 __attribute__((overloadable))
290        sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
291                                uint32_t vecSize, rs_data_type dt,
292                                rs_sampler s,
293                                float uv, uint32_t lod) {
294    rs_sampler_value wrapS = rsSamplerGetWrapS(s);
295    int32_t sourceW = type->mHal.state.lodDimX[lod];
296    int32_t iPixel = (int32_t)(uv * (float)(sourceW));
297    uint32_t location = wrapI(wrapS, iPixel, sourceW);
298
299    return getNearestSample(a, location, vecSize, dt, lod);
300}
301
302static float4 __attribute__((overloadable))
303        sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
304                               uint32_t vecSize, rs_data_type dt,
305                               rs_sampler s,
306                               float2 uv, uint32_t lod) {
307    rs_sampler_value wrapS = rsSamplerGetWrapS(s);
308    rs_sampler_value wrapT = rsSamplerGetWrapT(s);
309
310    int32_t sourceW = type->mHal.state.lodDimX[lod];
311    int32_t sourceH = type->mHal.state.lodDimY[lod];
312
313    float2 dimF;
314    dimF.x = (float)(sourceW);
315    dimF.y = (float)(sourceH);
316    float2 pixelUV = uv * dimF;
317    int2 iPixel = convert_int2(pixelUV);
318
319    float2 frac = pixelUV - convert_float2(iPixel);
320
321    if (frac.x < 0.5f) {
322        iPixel.x -= 1;
323        frac.x += 0.5f;
324    } else {
325        frac.x -= 0.5f;
326    }
327    if (frac.y < 0.5f) {
328        iPixel.y -= 1;
329        frac.y += 0.5f;
330    } else {
331        frac.y -= 0.5f;
332    }
333    float2 oneMinusFrac = 1.0f - frac;
334
335    float4 weights;
336    weights.x = oneMinusFrac.x * oneMinusFrac.y;
337    weights.y = frac.x * oneMinusFrac.y;
338    weights.z = oneMinusFrac.x * frac.y;
339    weights.w = frac.x * frac.y;
340
341    uint2 next;
342    next.x = wrapI(wrapS, iPixel.x + 1, sourceW);
343    next.y = wrapI(wrapT, iPixel.y + 1, sourceH);
344    uint2 location;
345    location.x = wrapI(wrapS, iPixel.x, sourceW);
346    location.y = wrapI(wrapT, iPixel.y, sourceH);
347
348    return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
349}
350
351static float4 __attribute__((overloadable))
352        sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
353                                uint32_t vecSize, rs_data_type dt,
354                                rs_sampler s,
355                                float2 uv, uint32_t lod) {
356    rs_sampler_value wrapS = rsSamplerGetWrapS(s);
357    rs_sampler_value wrapT = rsSamplerGetWrapT(s);
358
359    int32_t sourceW = type->mHal.state.lodDimX[lod];
360    int32_t sourceH = type->mHal.state.lodDimY[lod];
361
362    float2 dimF;
363    dimF.x = (float)(sourceW);
364    dimF.y = (float)(sourceH);
365    int2 iPixel = convert_int2(uv * dimF);
366
367    uint2 location;
368    location.x = wrapI(wrapS, iPixel.x, sourceW);
369    location.y = wrapI(wrapT, iPixel.y, sourceH);
370    return getNearestSample(a, location, vecSize, dt, lod);
371}
372
373extern const float4 __attribute__((overloadable))
374        rsSample(rs_allocation a, rs_sampler s, float location) {
375    return rsSample(a, s, location, 0);
376}
377
378extern const float4 __attribute__((overloadable))
379        rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
380    SAMPLE_FUNC_BODY()
381}
382
383extern const float4 __attribute__((overloadable))
384        rsSample(rs_allocation a, rs_sampler s, float2 location) {
385    return rsSample(a, s, location, 0.0f);
386}
387
388extern const float4 __attribute__((overloadable))
389        rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
390    SAMPLE_FUNC_BODY()
391}
392