rs_sample.c revision 3ff0fe77fdba8ad4a920dc27157d8c1786bb3661
1#include "rs_core.rsh"
2#include "rs_structs.h"
3
4
5// 565 Conversion bits taken from SkBitmap
6#define SK_R16_BITS     5
7#define SK_G16_BITS     6
8#define SK_B16_BITS     5
9
10#define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
11#define SK_G16_SHIFT    (SK_B16_BITS)
12#define SK_B16_SHIFT    0
13
14#define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
15#define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
16#define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
17
18#define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
19#define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
20#define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
21
22static inline unsigned SkR16ToR32(unsigned r) {
23    return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
24}
25
26static inline unsigned SkG16ToG32(unsigned g) {
27    return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
28}
29
30static inline unsigned SkB16ToB32(unsigned b) {
31    return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
32}
33
34#define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
35#define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
36#define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
37
38static float3 getFrom565(uint16_t color) {
39    float3 result;
40    result.x = (float)SkPacked16ToR32(color);
41    result.y = (float)SkPacked16ToG32(color);
42    result.z = (float)SkPacked16ToB32(color);
43    return result;
44}
45
46/**
47* Allocation sampling
48*/
49static inline float __attribute__((overloadable))
50        getElementAt1(const uint8_t *p, int32_t x) {
51    float r = p[x];
52    return r;
53}
54
55static inline float2 __attribute__((overloadable))
56        getElementAt2(const uint8_t *p, int32_t x) {
57    x *= 2;
58    float2 r = {p[x], p[x+1]};
59    return r;
60}
61
62static inline float3 __attribute__((overloadable))
63        getElementAt3(const uint8_t *p, int32_t x) {
64    x *= 4;
65    float3 r = {p[x], p[x+1], p[x+2]};
66    return r;
67}
68
69static inline float4 __attribute__((overloadable))
70        getElementAt4(const uint8_t *p, int32_t x) {
71    x *= 4;
72    const uchar4 *p2 = (const uchar4 *)&p[x];
73    return convert_float4(p2[0]);
74}
75
76static inline float3 __attribute__((overloadable))
77        getElementAt565(const uint8_t *p, int32_t x) {
78    x *= 2;
79    float3 r = getFrom565(((const uint16_t *)p)[0]);
80    return r;
81}
82
83static inline float __attribute__((overloadable))
84        getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
85    p += y * stride;
86    float r = p[x];
87    return r;
88}
89
90static inline float2 __attribute__((overloadable))
91        getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
92    p += y * stride;
93    x *= 2;
94    float2 r = {p[x], p[x+1]};
95    return r;
96}
97
98static inline float3 __attribute__((overloadable))
99        getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
100    p += y * stride;
101    x *= 4;
102    float3 r = {p[x], p[x+1], p[x+2]};
103    return r;
104}
105
106static inline float4 __attribute__((overloadable))
107        getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
108    p += y * stride;
109    x *= 4;
110    float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
111    return r;
112}
113
114static inline float3 __attribute__((overloadable))
115        getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
116    p += y * stride;
117    x *= 2;
118    float3 r = getFrom565(((const uint16_t *)p)[0]);
119    return r;
120}
121
122
123
124
125
126static float4 __attribute__((overloadable))
127            getSample_A(const uint8_t *p, int32_t iPixel,
128                          int32_t next, float w0, float w1) {
129    float p0 = getElementAt1(p, iPixel);
130    float p1 = getElementAt1(p, next);
131    float r = p0 * w0 + p1 * w1;
132    r *= (1.f / 255.f);
133    float4 ret = {0.f, 0.f, 0.f, r};
134    return ret;
135}
136static float4 __attribute__((overloadable))
137            getSample_L(const uint8_t *p, int32_t iPixel,
138                          int32_t next, float w0, float w1) {
139    float p0 = getElementAt1(p, iPixel);
140    float p1 = getElementAt1(p, next);
141    float r = p0 * w0 + p1 * w1;
142    r *= (1.f / 255.f);
143    float4 ret = {r, r, r, 1.f};
144    return ret;
145}
146static float4 __attribute__((overloadable))
147            getSample_LA(const uint8_t *p, int32_t iPixel,
148                           int32_t next, float w0, float w1) {
149    float2 p0 = getElementAt2(p, iPixel);
150    float2 p1 = getElementAt2(p, next);
151    float2 r = p0 * w0 + p1 * w1;
152    r *= (1.f / 255.f);
153    float4 ret = {r.x, r.x, r.x, r.y};
154    return ret;
155}
156static float4 __attribute__((overloadable))
157            getSample_RGB(const uint8_t *p, int32_t iPixel,
158                            int32_t next, float w0, float w1) {
159    float3 p0 = getElementAt3(p, iPixel);
160    float3 p1 = getElementAt3(p, next);
161    float3 r = p0 * w0 + p1 * w1;
162    r *= (1.f / 255.f);
163    float4 ret = {r.x, r.x, r.z, 1.f};
164    return ret;
165}
166static float4 __attribute__((overloadable))
167            getSample_565(const uint8_t *p, int32_t iPixel,
168                           int32_t next, float w0, float w1) {
169    float3 p0 = getElementAt565(p, iPixel);
170    float3 p1 = getElementAt565(p, next);
171    float3 r = p0 * w0 + p1 * w1;
172    r *= (1.f / 255.f);
173    float4 ret = {r.x, r.x, r.z, 1.f};
174    return ret;
175}
176static float4 __attribute__((overloadable))
177            getSample_RGBA(const uint8_t *p, int32_t iPixel,
178                             int32_t next, float w0, float w1) {
179    float4 p0 = getElementAt4(p, iPixel);
180    float4 p1 = getElementAt4(p, next);
181    float4 r = p0 * w0 + p1 * w1;
182    r *= (1.f / 255.f);
183    return r;
184}
185
186
187static float4 __attribute__((overloadable))
188            getSample_A(const uint8_t *p, size_t stride,
189                          int locX, int locY, int nextX, int nextY,
190                          float w0, float w1, float w2, float w3) {
191    float p0 = getElementAt1(p, stride, locX, locY);
192    float p1 = getElementAt1(p, stride, nextX, locY);
193    float p2 = getElementAt1(p, stride, locX, nextY);
194    float p3 = getElementAt1(p, stride, nextX, nextY);
195    float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
196    r *= (1.f / 255.f);
197    float4 ret = {0.f, 0.f, 0.f, r};
198    return ret;
199}
200static float4 __attribute__((overloadable))
201            getSample_L(const uint8_t *p, size_t stride,
202                         int locX, int locY, int nextX, int nextY,
203                         float w0, float w1, float w2, float w3) {
204    float p0 = getElementAt1(p, stride, locX, locY);
205    float p1 = getElementAt1(p, stride, nextX, locY);
206    float p2 = getElementAt1(p, stride, locX, nextY);
207    float p3 = getElementAt1(p, stride, nextX, nextY);
208    float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
209    r *= (1.f / 255.f);
210    float4 ret = {r, r, r, 1.f};
211    return ret;
212}
213static float4 __attribute__((overloadable))
214            getSample_LA(const uint8_t *p, size_t stride,
215                         int locX, int locY, int nextX, int nextY,
216                         float w0, float w1, float w2, float w3) {
217    float2 p0 = getElementAt2(p, stride, locX, locY);
218    float2 p1 = getElementAt2(p, stride, nextX, locY);
219    float2 p2 = getElementAt2(p, stride, locX, nextY);
220    float2 p3 = getElementAt2(p, stride, nextX, nextY);
221    float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
222    r *= (1.f / 255.f);
223    float4 ret = {r.x, r.x, r.x, r.y};
224    return ret;
225}
226static float4 __attribute__((overloadable))
227            getSample_RGB(const uint8_t *p, size_t stride,
228                         int locX, int locY, int nextX, int nextY,
229                         float w0, float w1, float w2, float w3) {
230    float4 p0 = getElementAt4(p, stride, locX, locY);
231    float4 p1 = getElementAt4(p, stride, nextX, locY);
232    float4 p2 = getElementAt4(p, stride, locX, nextY);
233    float4 p3 = getElementAt4(p, stride, nextX, nextY);
234    float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
235    r *= (1.f / 255.f);
236    float4 ret = {r.x, r.y, r.z, 1.f};
237    return ret;
238}
239static float4 __attribute__((overloadable))
240            getSample_RGBA(const uint8_t *p, size_t stride,
241                         int locX, int locY, int nextX, int nextY,
242                         float w0, float w1, float w2, float w3) {
243    float4 p0 = getElementAt4(p, stride, locX, locY);
244    float4 p1 = getElementAt4(p, stride, nextX, locY);
245    float4 p2 = getElementAt4(p, stride, locX, nextY);
246    float4 p3 = getElementAt4(p, stride, nextX, nextY);
247    float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
248    r *= (1.f / 255.f);
249    return r;
250}
251static float4 __attribute__((overloadable))
252            getSample_565(const uint8_t *p, size_t stride,
253                         int locX, int locY, int nextX, int nextY,
254                         float w0, float w1, float w2, float w3) {
255    float3 p0 = getElementAt565(p, stride, locX, locY);
256    float3 p1 = getElementAt565(p, stride, nextX, locY);
257    float3 p2 = getElementAt565(p, stride, locX, nextY);
258    float3 p3 = getElementAt565(p, stride, nextX, nextY);
259    float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
260    r *= (1.f / 255.f);
261    float4 ret;
262    ret.rgb = r;
263    ret.w = 1.f;
264    return ret;
265}
266
267static float4 __attribute__((overloadable))
268        getBilinearSample1D(const Allocation_t *alloc, float2 weights,
269                          uint32_t iPixel, uint32_t next,
270                          rs_data_kind dk, rs_data_type dt, uint32_t lod) {
271
272     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
273
274     switch(dk) {
275     case RS_KIND_PIXEL_RGBA:
276         return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
277     case RS_KIND_PIXEL_A:
278         return getSample_A(p, iPixel, next, weights.x, weights.y);
279     case RS_KIND_PIXEL_RGB:
280         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
281             return getSample_565(p, iPixel, next, weights.x, weights.y);
282         }
283         return getSample_RGB(p, iPixel, next, weights.x, weights.y);
284     case RS_KIND_PIXEL_L:
285         return getSample_L(p, iPixel, next, weights.x, weights.y);
286     case RS_KIND_PIXEL_LA:
287         return getSample_LA(p, iPixel, next, weights.x, weights.y);
288
289     default:
290         //__builtin_unreachable();
291         break;
292     }
293
294     //__builtin_unreachable();
295     return 0.f;
296}
297
298static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
299    if (wrap == RS_SAMPLER_WRAP) {
300        coord = coord % size;
301        if (coord < 0) {
302            coord += size;
303        }
304    }
305    if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
306        coord = coord % (size * 2);
307        if (coord < 0) {
308            coord = (size * 2) + coord;
309        }
310        if (coord >= size) {
311            coord = (size * 2) - coord;
312        }
313    }
314    return (uint32_t)max(0, min(coord, size - 1));
315}
316
317static float4 __attribute__((overloadable))
318        getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
319                          int lx, int ly, int nx, int ny,
320                          rs_data_kind dk, rs_data_type dt, uint32_t lod) {
321
322    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
323    size_t stride = alloc->mHal.drvState.lod[lod].stride;
324
325    switch(dk) {
326    case RS_KIND_PIXEL_RGBA:
327        return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
328    case RS_KIND_PIXEL_A:
329        return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
330    case RS_KIND_PIXEL_LA:
331        return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
332    case RS_KIND_PIXEL_RGB:
333        if (dt == RS_TYPE_UNSIGNED_5_6_5) {
334            return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
335        }
336        return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
337    case RS_KIND_PIXEL_L:
338        return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
339
340    default:
341        break;
342    }
343
344    return 0.f;
345}
346
347static float4  __attribute__((overloadable))
348        getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
349                         rs_data_type dt, uint32_t lod) {
350
351    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
352
353    float4 result = {0.f, 0.f, 0.f, 255.f};
354
355    switch(dk) {
356    case RS_KIND_PIXEL_RGBA:
357        result = getElementAt4(p, iPixel);
358        break;
359    case RS_KIND_PIXEL_A:
360        result.w = getElementAt1(p, iPixel);
361        break;
362    case RS_KIND_PIXEL_LA:
363        result.zw = getElementAt2(p, iPixel);
364        result.xy = result.z;
365        break;
366    case RS_KIND_PIXEL_RGB:
367        if (dt == RS_TYPE_UNSIGNED_5_6_5) {
368            result.xyz = getElementAt565(p, iPixel);
369        } else {
370            result.xyz = getElementAt3(p, iPixel);
371        }
372        break;
373    case RS_KIND_PIXEL_L:
374        result.xyz = getElementAt1(p, iPixel);
375
376    default:
377        //__builtin_unreachable();
378        break;
379    }
380
381    return result * 0.003921569f;
382}
383
384static float4  __attribute__((overloadable))
385        getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
386                         rs_data_type dt, uint32_t lod) {
387
388    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
389    size_t stride = alloc->mHal.drvState.lod[lod].stride;
390
391    float4 result = {0.f, 0.f, 0.f, 255.f};
392
393    switch(dk) {
394    case RS_KIND_PIXEL_RGBA:
395        result = getElementAt4(p, stride, iPixel.x, iPixel.y);
396        break;
397    case RS_KIND_PIXEL_A:
398        result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
399        break;
400    case RS_KIND_PIXEL_LA:
401        result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
402        result.xy = result.z;
403        break;
404    case RS_KIND_PIXEL_RGB:
405        if (dt == RS_TYPE_UNSIGNED_5_6_5) {
406            result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
407        } else {
408            result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
409        }
410        break;
411
412    default:
413        //__builtin_unreachable();
414        break;
415    }
416
417    return result * 0.003921569f;
418}
419
420static float4 __attribute__((overloadable))
421        sample_LOD_LinearPixel(const Allocation_t *alloc,
422                               rs_data_kind dk, rs_data_type dt,
423                               rs_sampler_value wrapS,
424                               float uv, uint32_t lod) {
425
426    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
427
428    int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
429    float pixelUV = uv * (float)(sourceW);
430    int32_t iPixel = (int32_t)(pixelUV);
431    float frac = pixelUV - (float)iPixel;
432
433    if (frac < 0.5f) {
434        iPixel -= 1;
435        frac += 0.5f;
436    } else {
437        frac -= 0.5f;
438    }
439
440    float oneMinusFrac = 1.0f - frac;
441
442    float2 weights;
443    weights.x = oneMinusFrac;
444    weights.y = frac;
445
446    uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
447    uint32_t location = wrapI(wrapS, iPixel, sourceW);
448
449    return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
450}
451
452static float4 __attribute__((overloadable))
453        sample_LOD_NearestPixel(const Allocation_t *alloc,
454                                rs_data_kind dk, rs_data_type dt,
455                                rs_sampler_value wrapS,
456                                float uv, uint32_t lod) {
457
458    int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
459    int32_t iPixel = (int32_t)(uv * (float)(sourceW));
460    uint32_t location = wrapI(wrapS, iPixel, sourceW);
461
462    return getNearestSample(alloc, location, dk, dt, lod);
463}
464
465static float4 __attribute__((overloadable))
466        sample_LOD_LinearPixel(const Allocation_t *alloc,
467                               rs_data_kind dk, rs_data_type dt,
468                               rs_sampler_value wrapS,
469                               rs_sampler_value wrapT,
470                               float2 uv, uint32_t lod) {
471
472    const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
473
474    int sourceW = alloc->mHal.drvState.lod[lod].dimX;
475    int sourceH = alloc->mHal.drvState.lod[lod].dimY;
476
477    float pixelU = uv.x * sourceW;
478    float pixelV = uv.y * sourceH;
479    int iPixelU = pixelU;
480    int iPixelV = pixelV;
481    float fracU = pixelU - iPixelU;
482    float fracV = pixelV - iPixelV;
483
484    if (fracU < 0.5f) {
485        iPixelU -= 1;
486        fracU += 0.5f;
487    } else {
488        fracU -= 0.5f;
489    }
490    if (fracV < 0.5f) {
491        iPixelV -= 1;
492        fracV += 0.5f;
493    } else {
494        fracV -= 0.5f;
495    }
496    float oneMinusFracU = 1.0f - fracU;
497    float oneMinusFracV = 1.0f - fracV;
498
499    float w0 = oneMinusFracU * oneMinusFracV;
500    float w1 = fracU * oneMinusFracV;
501    float w2 = oneMinusFracU * fracV;
502    float w3 = fracU * fracV;
503
504    int nx = wrapI(wrapS, iPixelU + 1, sourceW);
505    int ny = wrapI(wrapT, iPixelV + 1, sourceH);
506    int lx = wrapI(wrapS, iPixelU, sourceW);
507    int ly = wrapI(wrapT, iPixelV, sourceH);
508
509    return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
510
511}
512
513static float4 __attribute__((overloadable))
514        sample_LOD_NearestPixel(const Allocation_t *alloc,
515                                rs_data_kind dk, rs_data_type dt,
516                                rs_sampler_value wrapS,
517                                rs_sampler_value wrapT,
518                                float2 uv, uint32_t lod) {
519    int sourceW = alloc->mHal.drvState.lod[lod].dimX;
520    int sourceH = alloc->mHal.drvState.lod[lod].dimY;
521
522    float2 dimF;
523    dimF.x = (float)(sourceW);
524    dimF.y = (float)(sourceH);
525    int2 iPixel = convert_int2(uv * dimF);
526
527    uint2 location;
528    location.x = wrapI(wrapS, iPixel.x, sourceW);
529    location.y = wrapI(wrapT, iPixel.y, sourceH);
530    return getNearestSample(alloc, location, dk, dt, lod);
531}
532
533extern const float4 __attribute__((overloadable))
534        rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
535
536    const Allocation_t *alloc = (const Allocation_t *)a.p;
537    const Sampler_t *prog = (Sampler_t *)s.p;
538    const Type_t *type = (Type_t *)alloc->mHal.state.type;
539    const Element_t *elem = type->mHal.state.element;
540    rs_data_kind dk = elem->mHal.state.dataKind;
541    rs_data_type dt = elem->mHal.state.dataType;
542    rs_sampler_value sampleMin = prog->mHal.state.minFilter;
543    rs_sampler_value sampleMag = prog->mHal.state.magFilter;
544    rs_sampler_value wrapS = prog->mHal.state.wrapS;
545
546    if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
547        return 0.f;
548    }
549
550    if (lod <= 0.0f) {
551        if (sampleMag == RS_SAMPLER_NEAREST) {
552            return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
553        }
554        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
555    }
556
557    if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
558        uint32_t maxLOD = type->mHal.state.lodCount - 1;
559        lod = min(lod, (float)maxLOD);
560        uint32_t nearestLOD = (uint32_t)round(lod);
561        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
562    }
563
564    if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
565        uint32_t lod0 = (uint32_t)floor(lod);
566        uint32_t lod1 = (uint32_t)ceil(lod);
567        uint32_t maxLOD = type->mHal.state.lodCount - 1;
568        lod0 = min(lod0, maxLOD);
569        lod1 = min(lod1, maxLOD);
570        float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
571        float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
572        float frac = lod - (float)lod0;
573        return sample0 * (1.0f - frac) + sample1 * frac;
574    }
575
576    return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
577}
578
579extern const float4 __attribute__((overloadable))
580        rsSample(rs_allocation a, rs_sampler s, float location) {
581    return rsSample(a, s, location, 0);
582}
583
584
585extern const float4 __attribute__((overloadable))
586        rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
587
588    const Allocation_t *alloc = (const Allocation_t *)a.p;
589    const Sampler_t *prog = (Sampler_t *)s.p;
590    const Type_t *type = (Type_t *)alloc->mHal.state.type;
591    const Element_t *elem = type->mHal.state.element;
592    rs_data_kind dk = elem->mHal.state.dataKind;
593    rs_data_type dt = elem->mHal.state.dataType;
594    rs_sampler_value sampleMin = prog->mHal.state.minFilter;
595    rs_sampler_value sampleMag = prog->mHal.state.magFilter;
596    rs_sampler_value wrapS = prog->mHal.state.wrapS;
597    rs_sampler_value wrapT = prog->mHal.state.wrapT;
598
599    if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
600        return 0.f;
601    }
602
603    if (lod <= 0.0f) {
604        if (sampleMag == RS_SAMPLER_NEAREST) {
605            return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
606        }
607        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
608    }
609
610    if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
611        uint32_t maxLOD = type->mHal.state.lodCount - 1;
612        lod = min(lod, (float)maxLOD);
613        uint32_t nearestLOD = (uint32_t)round(lod);
614        return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
615    }
616
617    if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
618        uint32_t lod0 = (uint32_t)floor(lod);
619        uint32_t lod1 = (uint32_t)ceil(lod);
620        uint32_t maxLOD = type->mHal.state.lodCount - 1;
621        lod0 = min(lod0, maxLOD);
622        lod1 = min(lod1, maxLOD);
623        float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
624        float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
625        float frac = lod - (float)lod0;
626        return sample0 * (1.0f - frac) + sample1 * frac;
627    }
628
629    return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
630}
631
632extern const float4 __attribute__((overloadable))
633        rsSample(rs_allocation a, rs_sampler s, float2 uv) {
634
635    const Allocation_t *alloc = (const Allocation_t *)a.p;
636    const Sampler_t *prog = (Sampler_t *)s.p;
637    const Type_t *type = (Type_t *)alloc->mHal.state.type;
638    const Element_t *elem = type->mHal.state.element;
639    rs_data_kind dk = elem->mHal.state.dataKind;
640    rs_data_type dt = elem->mHal.state.dataType;
641    rs_sampler_value wrapS = prog->mHal.state.wrapS;
642    rs_sampler_value wrapT = prog->mHal.state.wrapT;
643
644    if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
645        return 0.f;
646    }
647
648    if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
649        return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
650    }
651    return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
652}
653
654