1#include "rs_core.rsh"
2#include "rs_structs.h"
3
4
5// 565 Conversion bits taken from SkBitmap
6#define SK_R16_BITS     5
7#define SK_G16_BITS     6
8#define SK_B16_BITS     5
9
10#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
11#define SK_G16_SHIFT    (SK_B16_BITS)
12#define SK_B16_SHIFT    0
13
14#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
15#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
16#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
17
18#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
19#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
20#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
21
22static inline unsigned SkR16ToR32(unsigned r) {
23    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
24}
25
26static inline unsigned SkG16ToG32(unsigned g) {
27    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
28}
29
30static inline unsigned SkB16ToB32(unsigned b) {
31    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
32}
33
34#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
35#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
36#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
37
38static float3 getFrom565(uint16_t color) {
39    float3 result;
40    result.x = (float)SkPacked16ToR32(color);
41    result.y = (float)SkPacked16ToG32(color);
42    result.z = (float)SkPacked16ToB32(color);
43    return result;
44}
45
46/**
47* Allocation sampling
48*/
49static inline float __attribute__((overloadable))
50        getElementAt1(const uint8_t *p, int32_t x) {
51    float r = p[x];
52    return r;
53}
54
55static inline float2 __attribute__((overloadable))
56        getElementAt2(const uint8_t *p, int32_t x) {
57    x *= 2;
58    float2 r = {p[x], p[x+1]};
59    return r;
60}
61
62static inline float3 __attribute__((overloadable))
63        getElementAt3(const uint8_t *p, int32_t x) {
64    x *= 4;
65    float3 r = {p[x], p[x+1], p[x+2]};
66    return r;
67}
68
69static inline float4 __attribute__((overloadable))
70        getElementAt4(const uint8_t *p, int32_t x) {
71    x *= 4;
72    const uchar4 *p2 = (const uchar4 *)&p[x];
73    return convert_float4(p2[0]);
74}
75
76static inline float3 __attribute__((overloadable))
77        getElementAt565(const uint8_t *p, int32_t x) {
78    x *= 2;
79    float3 r = getFrom565(((const uint16_t *)p)[0]);
80    return r;
81}
82
83static inline float __attribute__((overloadable))
84        getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
85    p += y * stride;
86    float r = p[x];
87    return r;
88}
89
90static inline float2 __attribute__((overloadable))
91        getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
92    p += y * stride;
93    x *= 2;
94    float2 r = {p[x], p[x+1]};
95    return r;
96}
97
98static inline float3 __attribute__((overloadable))
99        getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
100    p += y * stride;
101    x *= 4;
102    float3 r = {p[x], p[x+1], p[x+2]};
103    return r;
104}
105
106static inline float4 __attribute__((overloadable))
107        getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
108    p += y * stride;
109    x *= 4;
110    float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
111    return r;
112}
113
114static inline float3 __attribute__((overloadable))
115        getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
116    p += y * stride;
117    x *= 2;
118    float3 r = getFrom565(((const uint16_t *)p)[0]);
119    return r;
120}
121
122
123
124
125
126static float4 __attribute__((overloadable))
127            getSample_A(const uint8_t *p, int32_t iPixel,
128                          int32_t next, float w0, float w1) {
129    float p0 = getElementAt1(p, iPixel);
130    float p1 = getElementAt1(p, next);
131    float r = p0 * w0 + p1 * w1;
132    r *= (1.f / 255.f);
133    float4 ret = {0.f, 0.f, 0.f, r};
134    return ret;
135}
136static float4 __attribute__((overloadable))
137            getSample_L(const uint8_t *p, int32_t iPixel,
138                          int32_t next, float w0, float w1) {
139    float p0 = getElementAt1(p, iPixel);
140    float p1 = getElementAt1(p, next);
141    float r = p0 * w0 + p1 * w1;
142    r *= (1.f / 255.f);
143    float4 ret = {r, r, r, 1.f};
144    return ret;
145}
146static float4 __attribute__((overloadable))
147            getSample_LA(const uint8_t *p, int32_t iPixel,
148                           int32_t next, float w0, float w1) {
149    float2 p0 = getElementAt2(p, iPixel);
150    float2 p1 = getElementAt2(p, next);
151    float2 r = p0 * w0 + p1 * w1;
152    r *= (1.f / 255.f);
153    float4 ret = {r.x, r.x, r.x, r.y};
154    return ret;
155}
156static float4 __attribute__((overloadable))
157            getSample_RGB(const uint8_t *p, int32_t iPixel,
158                            int32_t next, float w0, float w1) {
159    float3 p0 = getElementAt3(p, iPixel);
160    float3 p1 = getElementAt3(p, next);
161    float3 r = p0 * w0 + p1 * w1;
162    r *= (1.f / 255.f);
163    float4 ret = {r.x, r.x, r.z, 1.f};
164    return ret;
165}
166static float4 __attribute__((overloadable))
167            getSample_565(const uint8_t *p, int32_t iPixel,
168                           int32_t next, float w0, float w1) {
169    float3 p0 = getElementAt565(p, iPixel);
170    float3 p1 = getElementAt565(p, next);
171    float3 r = p0 * w0 + p1 * w1;
172    r *= (1.f / 255.f);
173    float4 ret = {r.x, r.x, r.z, 1.f};
174    return ret;
175}
176static float4 __attribute__((overloadable))
177            getSample_RGBA(const uint8_t *p, int32_t iPixel,
178                             int32_t next, float w0, float w1) {
179    float4 p0 = getElementAt4(p, iPixel);
180    float4 p1 = getElementAt4(p, next);
181    float4 r = p0 * w0 + p1 * w1;
182    r *= (1.f / 255.f);
183    return r;
184}
185
186
187static float4 __attribute__((overloadable))
188            getSample_A(const uint8_t *p, size_t stride,
189                          int locX, int locY, int nextX, int nextY,
190                          float w0, float w1, float w2, float w3) {
191    float p0 = getElementAt1(p, stride, locX, locY);
192    float p1 = getElementAt1(p, stride, nextX, locY);
193    float p2 = getElementAt1(p, stride, locX, nextY);
194    float p3 = getElementAt1(p, stride, nextX, nextY);
195    float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
196    r *= (1.f / 255.f);
197    float4 ret = {0.f, 0.f, 0.f, r};
198    return ret;
199}
200static float4 __attribute__((overloadable))
201            getSample_L(const uint8_t *p, size_t stride,
202                         int locX, int locY, int nextX, int nextY,
203                         float w0, float w1, float w2, float w3) {
204    float p0 = getElementAt1(p, stride, locX, locY);
205    float p1 = getElementAt1(p, stride, nextX, locY);
206    float p2 = getElementAt1(p, stride, locX, nextY);
207    float p3 = getElementAt1(p, stride, nextX, nextY);
208    float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
209    r *= (1.f / 255.f);
210    float4 ret = {r, r, r, 1.f};
211    return ret;
212}
213static float4 __attribute__((overloadable))
214            getSample_LA(const uint8_t *p, size_t stride,
215                         int locX, int locY, int nextX, int nextY,
216                         float w0, float w1, float w2, float w3) {
217    float2 p0 = getElementAt2(p, stride, locX, locY);
218    float2 p1 = getElementAt2(p, stride, nextX, locY);
219    float2 p2 = getElementAt2(p, stride, locX, nextY);
220    float2 p3 = getElementAt2(p, stride, nextX, nextY);
221    float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
222    r *= (1.f / 255.f);
223    float4 ret = {r.x, r.x, r.x, r.y};
224    return ret;
225}
226static float4 __attribute__((overloadable))
227            getSample_RGB(const uint8_t *p, size_t stride,
228                         int locX, int locY, int nextX, int nextY,
229                         float w0, float w1, float w2, float w3) {
230    float4 p0 = getElementAt4(p, stride, locX, locY);
231    float4 p1 = getElementAt4(p, stride, nextX, locY);
232    float4 p2 = getElementAt4(p, stride, locX, nextY);
233    float4 p3 = getElementAt4(p, stride, nextX, nextY);
234    float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
235    r *= (1.f / 255.f);
236    float4 ret = {r.x, r.y, r.z, 1.f};
237    return ret;
238}
239static float4 __attribute__((overloadable))
240            getSample_RGBA(const uint8_t *p, size_t stride,
241                         int locX, int locY, int nextX, int nextY,
242                         float w0, float w1, float w2, float w3) {
243    float4 p0 = getElementAt4(p, stride, locX, locY);
244    float4 p1 = getElementAt4(p, stride, nextX, locY);
245    float4 p2 = getElementAt4(p, stride, locX, nextY);
246    float4 p3 = getElementAt4(p, stride, nextX, nextY);
247    float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
248    r *= (1.f / 255.f);
249    return r;
250}
251static float4 __attribute__((overloadable))
252            getSample_565(const uint8_t *p, size_t stride,
253                         int locX, int locY, int nextX, int nextY,
254                         float w0, float w1, float w2, float w3) {
255    float3 p0 = getElementAt565(p, stride, locX, locY);
256    float3 p1 = getElementAt565(p, stride, nextX, locY);
257    float3 p2 = getElementAt565(p, stride, locX, nextY);
258    float3 p3 = getElementAt565(p, stride, nextX, nextY);
259    float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
260    r *= (1.f / 255.f);
261    float4 ret;
262    ret.rgb = r;
263    ret.w = 1.f;
264    return ret;
265}
266
267static float4 __attribute__((overloadable))
268        getBilinearSample1D(const Allocation_t *alloc, float2 weights,
269                          uint32_t iPixel, uint32_t next,
270                          rs_data_kind dk, rs_data_type dt, uint32_t lod) {
271
272     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
273
274     switch(dk) {
275     case RS_KIND_PIXEL_RGBA:
276         return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
277     case RS_KIND_PIXEL_A:
278         return getSample_A(p, iPixel, next, weights.x, weights.y);
279     case RS_KIND_PIXEL_RGB:
280         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
281             return getSample_565(p, iPixel, next, weights.x, weights.y);
282         }
283         return getSample_RGB(p, iPixel, next, weights.x, weights.y);
284     case RS_KIND_PIXEL_L:
285         return getSample_L(p, iPixel, next, weights.x, weights.y);
286     case RS_KIND_PIXEL_LA:
287         return getSample_LA(p, iPixel, next, weights.x, weights.y);
288
289     default:
290         //__builtin_unreachable();
291         break;
292     }
293
294     //__builtin_unreachable();
295     return 0.f;
296}
297
298static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
299    if (wrap == RS_SAMPLER_WRAP) {
300        coord = coord % size;
301        if (coord < 0) {
302            coord += size;
303        }
304    }
305    if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
306        coord = coord % (size * 2);
307        if (coord < 0) {
308            coord = (size * 2) + coord;
309        }
310        if (coord >= size) {
311            coord = (size * 2 - 1) - coord;
312        }
313    }
314    return (uint32_t)max(0, min(coord, size - 1));
315}
316
317static float4 __attribute__((overloadable))
318        getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
319                          int lx, int ly, int nx, int ny,
320                          rs_data_kind dk, rs_data_type dt, uint32_t lod) {
321
322    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
323    size_t stride = alloc->mHal.drvState.lod[lod].stride;
324
325    switch(dk) {
326    case RS_KIND_PIXEL_RGBA:
327        return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
328    case RS_KIND_PIXEL_A:
329        return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
330    case RS_KIND_PIXEL_LA:
331        return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
332    case RS_KIND_PIXEL_RGB:
333        if (dt == RS_TYPE_UNSIGNED_5_6_5) {
334            return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
335        }
336        return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
337    case RS_KIND_PIXEL_L:
338        return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
339
340    default:
341        break;
342    }
343
344    return 0.f;
345}
346
347static float4  __attribute__((overloadable))
348        getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
349                         rs_data_type dt, uint32_t lod) {
350
351    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
352
353    float4 result = {0.f, 0.f, 0.f, 255.f};
354
355    switch(dk) {
356    case RS_KIND_PIXEL_RGBA:
357        result = getElementAt4(p, iPixel);
358        break;
359    case RS_KIND_PIXEL_A:
360        result.w = getElementAt1(p, iPixel);
361        break;
362    case RS_KIND_PIXEL_LA:
363        result.zw = getElementAt2(p, iPixel);
364        result.xy = result.z;
365        break;
366    case RS_KIND_PIXEL_RGB:
367        if (dt == RS_TYPE_UNSIGNED_5_6_5) {
368            result.xyz = getElementAt565(p, iPixel);
369        } else {
370            result.xyz = getElementAt3(p, iPixel);
371        }
372        break;
373    case RS_KIND_PIXEL_L:
374        result.xyz = getElementAt1(p, iPixel);
375
376    default:
377        //__builtin_unreachable();
378        break;
379    }
380
381    return result * 0.003921569f;
382}
383
384static float4  __attribute__((overloadable))
385        getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
386                         rs_data_type dt, uint32_t lod) {
387
388    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
389    size_t stride = alloc->mHal.drvState.lod[lod].stride;
390
391    float4 result = {0.f, 0.f, 0.f, 255.f};
392
393    switch(dk) {
394    case RS_KIND_PIXEL_RGBA:
395        result = getElementAt4(p, stride, iPixel.x, iPixel.y);
396        break;
397    case RS_KIND_PIXEL_A:
398        result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
399        break;
400    case RS_KIND_PIXEL_LA:
401        result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
402        result.xy = result.z;
403        break;
404    case RS_KIND_PIXEL_RGB:
405        if (dt == RS_TYPE_UNSIGNED_5_6_5) {
406            result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
407        } else {
408            result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
409        }
410        break;
411
412    default:
413        //__builtin_unreachable();
414        break;
415    }
416
417    return result * 0.003921569f;
418}
419
420static float4 __attribute__((overloadable))
421        sample_LOD_LinearPixel(const Allocation_t *alloc,
422                               rs_data_kind dk, rs_data_type dt,
423                               rs_sampler_value wrapS,
424                               float uv, uint32_t lod) {
425
426    int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
427    float pixelUV = uv * (float)(sourceW);
428    int32_t iPixel = floor(pixelUV);
429    float frac = pixelUV - (float)iPixel;
430
431    if (frac < 0.5f) {
432        iPixel -= 1;
433        frac += 0.5f;
434    } else {
435        frac -= 0.5f;
436    }
437
438    float oneMinusFrac = 1.0f - frac;
439
440    float2 weights;
441    weights.x = oneMinusFrac;
442    weights.y = frac;
443
444    uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
445    uint32_t location = wrapI(wrapS, iPixel, sourceW);
446
447    return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
448}
449
450static float4 __attribute__((overloadable))
451        sample_LOD_NearestPixel(const Allocation_t *alloc,
452                                rs_data_kind dk, rs_data_type dt,
453                                rs_sampler_value wrapS,
454                                float uv, uint32_t lod) {
455
456    int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
457    int32_t iPixel = floor(uv * (float)(sourceW));
458    uint32_t location = wrapI(wrapS, iPixel, sourceW);
459
460    return getNearestSample(alloc, location, dk, dt, lod);
461}
462
463static float4 __attribute__((overloadable))
464        sample_LOD_LinearPixel(const Allocation_t *alloc,
465                               rs_data_kind dk, rs_data_type dt,
466                               rs_sampler_value wrapS,
467                               rs_sampler_value wrapT,
468                               float2 uv, uint32_t lod) {
469
470    int sourceW = alloc->mHal.drvState.lod[lod].dimX;
471    int sourceH = alloc->mHal.drvState.lod[lod].dimY;
472
473    float pixelU = uv.x * sourceW;
474    float pixelV = uv.y * sourceH;
475    int iPixelU = floor(pixelU);
476    int iPixelV = floor(pixelV);
477    float fracU = pixelU - iPixelU;
478    float fracV = pixelV - iPixelV;
479
480    if (fracU < 0.5f) {
481        iPixelU -= 1;
482        fracU += 0.5f;
483    } else {
484        fracU -= 0.5f;
485    }
486    if (fracV < 0.5f) {
487        iPixelV -= 1;
488        fracV += 0.5f;
489    } else {
490        fracV -= 0.5f;
491    }
492    float oneMinusFracU = 1.0f - fracU;
493    float oneMinusFracV = 1.0f - fracV;
494
495    float w0 = oneMinusFracU * oneMinusFracV;
496    float w1 = fracU * oneMinusFracV;
497    float w2 = oneMinusFracU * fracV;
498    float w3 = fracU * fracV;
499
500    int nx = wrapI(wrapS, iPixelU + 1, sourceW);
501    int ny = wrapI(wrapT, iPixelV + 1, sourceH);
502    int lx = wrapI(wrapS, iPixelU, sourceW);
503    int ly = wrapI(wrapT, iPixelV, sourceH);
504
505    return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
506
507}
508
509static float4 __attribute__((overloadable))
510        sample_LOD_NearestPixel(const Allocation_t *alloc,
511                                rs_data_kind dk, rs_data_type dt,
512                                rs_sampler_value wrapS,
513                                rs_sampler_value wrapT,
514                                float2 uv, uint32_t lod) {
515    int sourceW = alloc->mHal.drvState.lod[lod].dimX;
516    int sourceH = alloc->mHal.drvState.lod[lod].dimY;
517
518    float2 dimF;
519    dimF.x = (float)(sourceW);
520    dimF.y = (float)(sourceH);
521    int2 iPixel = convert_int2(floor(uv * dimF));
522
523    uint2 location;
524    location.x = wrapI(wrapS, iPixel.x, sourceW);
525    location.y = wrapI(wrapT, iPixel.y, sourceH);
526    return getNearestSample(alloc, location, dk, dt, lod);
527}
528
529extern float4 __attribute__((overloadable))
530        rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
531
532    const Allocation_t *alloc = (const Allocation_t *)a.p;
533    const Sampler_t *prog = (Sampler_t *)s.p;
534    const Type_t *type = (Type_t *)alloc->mHal.state.type;
535    const Element_t *elem = type->mHal.state.element;
536    rs_data_kind dk = elem->mHal.state.dataKind;
537    rs_data_type dt = elem->mHal.state.dataType;
538    rs_sampler_value sampleMin = prog->mHal.state.minFilter;
539    rs_sampler_value sampleMag = prog->mHal.state.magFilter;
540    rs_sampler_value wrapS = prog->mHal.state.wrapS;
541
542    if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
543        return 0.f;
544    }
545
546    if (lod <= 0.0f) {
547        if (sampleMag == RS_SAMPLER_NEAREST) {
548            return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
549        }
550        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
551    }
552
553    if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
554        uint32_t maxLOD = type->mHal.state.lodCount - 1;
555        lod = min(lod, (float)maxLOD);
556        uint32_t nearestLOD = (uint32_t)round(lod);
557        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
558    }
559
560    if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
561        uint32_t lod0 = (uint32_t)floor(lod);
562        uint32_t lod1 = (uint32_t)ceil(lod);
563        uint32_t maxLOD = type->mHal.state.lodCount - 1;
564        lod0 = min(lod0, maxLOD);
565        lod1 = min(lod1, maxLOD);
566        float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
567        float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
568        float frac = lod - (float)lod0;
569        return sample0 * (1.0f - frac) + sample1 * frac;
570    }
571
572    return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
573}
574
575extern float4 __attribute__((overloadable))
576        rsSample(rs_allocation a, rs_sampler s, float location) {
577    return rsSample(a, s, location, 0);
578}
579
580
581extern float4 __attribute__((overloadable))
582        rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
583
584    const Allocation_t *alloc = (const Allocation_t *)a.p;
585    const Sampler_t *prog = (Sampler_t *)s.p;
586    const Type_t *type = (Type_t *)alloc->mHal.state.type;
587    const Element_t *elem = type->mHal.state.element;
588    rs_data_kind dk = elem->mHal.state.dataKind;
589    rs_data_type dt = elem->mHal.state.dataType;
590    rs_sampler_value sampleMin = prog->mHal.state.minFilter;
591    rs_sampler_value sampleMag = prog->mHal.state.magFilter;
592    rs_sampler_value wrapS = prog->mHal.state.wrapS;
593    rs_sampler_value wrapT = prog->mHal.state.wrapT;
594
595    if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
596        return 0.f;
597    }
598
599    if (lod <= 0.0f) {
600        if (sampleMag == RS_SAMPLER_NEAREST) {
601            return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
602        }
603        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
604    }
605
606    if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
607        uint32_t maxLOD = type->mHal.state.lodCount - 1;
608        lod = min(lod, (float)maxLOD);
609        uint32_t nearestLOD = (uint32_t)round(lod);
610        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
611    }
612
613    if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
614        uint32_t lod0 = (uint32_t)floor(lod);
615        uint32_t lod1 = (uint32_t)ceil(lod);
616        uint32_t maxLOD = type->mHal.state.lodCount - 1;
617        lod0 = min(lod0, maxLOD);
618        lod1 = min(lod1, maxLOD);
619        float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
620        float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
621        float frac = lod - (float)lod0;
622        return sample0 * (1.0f - frac) + sample1 * frac;
623    }
624
625    return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
626}
627
628extern float4 __attribute__((overloadable))
629        rsSample(rs_allocation a, rs_sampler s, float2 uv) {
630
631    const Allocation_t *alloc = (const Allocation_t *)a.p;
632    const Sampler_t *prog = (Sampler_t *)s.p;
633    const Type_t *type = (Type_t *)alloc->mHal.state.type;
634    const Element_t *elem = type->mHal.state.element;
635    rs_data_kind dk = elem->mHal.state.dataKind;
636    rs_data_type dt = elem->mHal.state.dataType;
637    rs_sampler_value wrapS = prog->mHal.state.wrapS;
638    rs_sampler_value wrapT = prog->mHal.state.wrapT;
639
640    if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
641        return 0.f;
642    }
643
644    if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
645        return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
646    }
647    return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
648}
649