1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21namespace android {
22namespace renderscript {
23
24
25class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
26public:
27    void populateScript(Script *) override;
28    void invokeFreeChildren() override;
29
30    void setGlobalObj(uint32_t slot, ObjectBase *data) override;
31
32    ~RsdCpuScriptIntrinsicResize() override;
33    RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
34
35    void preLaunch(uint32_t slot, const Allocation ** ains,
36                   uint32_t inLen, Allocation * aout, const void * usr,
37                   uint32_t usrLen, const RsScriptCall *sc) override;
38
39    float scaleX;
40    float scaleY;
41
42protected:
43    ObjectBaseRef<const Allocation> mAlloc;
44    ObjectBaseRef<const Element> mElement;
45
46    static void kernelU1(const RsExpandKernelDriverInfo *info,
47                         uint32_t xstart, uint32_t xend,
48                         uint32_t outstep);
49    static void kernelU2(const RsExpandKernelDriverInfo *info,
50                         uint32_t xstart, uint32_t xend,
51                         uint32_t outstep);
52    static void kernelU4(const RsExpandKernelDriverInfo *info,
53                         uint32_t xstart, uint32_t xend,
54                         uint32_t outstep);
55    static void kernelF1(const RsExpandKernelDriverInfo *info,
56                         uint32_t xstart, uint32_t xend,
57                         uint32_t outstep);
58    static void kernelF2(const RsExpandKernelDriverInfo *info,
59                         uint32_t xstart, uint32_t xend,
60                         uint32_t outstep);
61    static void kernelF4(const RsExpandKernelDriverInfo *info,
62                         uint32_t xstart, uint32_t xend,
63                         uint32_t outstep);
64};
65
66void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
67    rsAssert(slot == 0);
68    mAlloc.set(static_cast<Allocation *>(data));
69}
70
71static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
72    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
73            + x * (3.f * (p1 - p2) + p3 - p0)));
74}
75
76static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
77    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
78            + x * (3.f * (p1 - p2) + p3 - p0)));
79}
80
81static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
82    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
83            + x * (3.f * (p1 - p2) + p3 - p0)));
84}
85
86static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
87                         float xf, float yf, int width) {
88    int startx = (int) floor(xf - 1);
89    xf = xf - floor(xf);
90    int maxx = width - 1;
91    int xs0 = rsMax(0, startx + 0);
92    int xs1 = rsMax(0, startx + 1);
93    int xs2 = rsMin(maxx, startx + 2);
94    int xs3 = rsMin(maxx, startx + 3);
95
96    float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
97                                  convert_float4(yp0[xs1]),
98                                  convert_float4(yp0[xs2]),
99                                  convert_float4(yp0[xs3]), xf);
100
101    float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
102                                  convert_float4(yp1[xs1]),
103                                  convert_float4(yp1[xs2]),
104                                  convert_float4(yp1[xs3]), xf);
105
106    float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
107                                  convert_float4(yp2[xs1]),
108                                  convert_float4(yp2[xs2]),
109                                  convert_float4(yp2[xs3]), xf);
110
111    float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
112                                  convert_float4(yp3[xs1]),
113                                  convert_float4(yp3[xs2]),
114                                  convert_float4(yp3[xs3]), xf);
115
116    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
117    p = clamp(p + 0.5f, 0.f, 255.f);
118    return convert_uchar4(p);
119}
120
121static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
122                         float xf, float yf, int width) {
123    int startx = (int) floor(xf - 1);
124    xf = xf - floor(xf);
125    int maxx = width - 1;
126    int xs0 = rsMax(0, startx + 0);
127    int xs1 = rsMax(0, startx + 1);
128    int xs2 = rsMin(maxx, startx + 2);
129    int xs3 = rsMin(maxx, startx + 3);
130
131    float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
132                                  convert_float2(yp0[xs1]),
133                                  convert_float2(yp0[xs2]),
134                                  convert_float2(yp0[xs3]), xf);
135
136    float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
137                                  convert_float2(yp1[xs1]),
138                                  convert_float2(yp1[xs2]),
139                                  convert_float2(yp1[xs3]), xf);
140
141    float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
142                                  convert_float2(yp2[xs1]),
143                                  convert_float2(yp2[xs2]),
144                                  convert_float2(yp2[xs3]), xf);
145
146    float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
147                                  convert_float2(yp3[xs1]),
148                                  convert_float2(yp3[xs2]),
149                                  convert_float2(yp3[xs3]), xf);
150
151    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
152    p = clamp(p + 0.5f, 0.f, 255.f);
153    return convert_uchar2(p);
154}
155
156static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
157                        float xf, float yf, int width) {
158    int startx = (int) floor(xf - 1);
159    xf = xf - floor(xf);
160    int maxx = width - 1;
161    int xs0 = rsMax(0, startx + 0);
162    int xs1 = rsMax(0, startx + 1);
163    int xs2 = rsMin(maxx, startx + 2);
164    int xs3 = rsMin(maxx, startx + 3);
165
166    float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
167                                 (float)yp0[xs2], (float)yp0[xs3], xf);
168    float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
169                                 (float)yp1[xs2], (float)yp1[xs3], xf);
170    float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
171                                 (float)yp2[xs2], (float)yp2[xs3], xf);
172    float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
173                                 (float)yp3[xs2], (float)yp3[xs3], xf);
174
175    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
176    p = clamp(p + 0.5f, 0.f, 255.f);
177    return (uchar)p;
178}
179
180extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
181
182extern "C" void rsdIntrinsicResizeB4_K(
183            uchar4 *dst,
184            size_t count,
185            uint32_t xf,
186            uint32_t xinc,
187            uchar4 const *srcn,
188            uchar4 const *src0,
189            uchar4 const *src1,
190            uchar4 const *src2,
191            size_t xclip,
192            size_t avail,
193            uint64_t osc_ctl,
194            int32_t const *yr);
195
196extern "C" void rsdIntrinsicResizeB2_K(
197            uchar2 *dst,
198            size_t count,
199            uint32_t xf,
200            uint32_t xinc,
201            uchar2 const *srcn,
202            uchar2 const *src0,
203            uchar2 const *src1,
204            uchar2 const *src2,
205            size_t xclip,
206            size_t avail,
207            uint64_t osc_ctl,
208            int32_t const *yr);
209
210extern "C" void rsdIntrinsicResizeB1_K(
211            uchar *dst,
212            size_t count,
213            uint32_t xf,
214            uint32_t xinc,
215            uchar const *srcn,
216            uchar const *src0,
217            uchar const *src1,
218            uchar const *src2,
219            size_t xclip,
220            size_t avail,
221            uint64_t osc_ctl,
222            int32_t const *yr);
223
224#if defined(ARCH_ARM_USE_INTRINSICS)
225static void mkYCoeff(int32_t *yr, float yf) {
226    int32_t yf1 = rint(yf * 0x10000);
227    int32_t yf2 = rint(yf * yf * 0x10000);
228    int32_t yf3 = rint(yf * yf * yf * 0x10000);
229
230    yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
231    yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
232    yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
233    yr[3] = -(yf3 - yf2) >> 1;
234}
235#endif
236
237static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
238                         float xf, float yf, int width) {
239    int startx = (int) floor(xf - 1);
240    xf = xf - floor(xf);
241    int maxx = width - 1;
242    int xs0 = rsMax(0, startx + 0);
243    int xs1 = rsMax(0, startx + 1);
244    int xs2 = rsMin(maxx, startx + 2);
245    int xs3 = rsMin(maxx, startx + 3);
246
247    float4 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
248                                  yp0[xs2], yp0[xs3], xf);
249    float4 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
250                                  yp1[xs2], yp1[xs3], xf);
251    float4 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
252                                  yp2[xs2], yp2[xs3], xf);
253    float4 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
254                                  yp3[xs2], yp3[xs3], xf);
255
256    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
257    return p;
258}
259
260static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
261                         float xf, float yf, int width) {
262    int startx = (int) floor(xf - 1);
263    xf = xf - floor(xf);
264    int maxx = width - 1;
265    int xs0 = rsMax(0, startx + 0);
266    int xs1 = rsMax(0, startx + 1);
267    int xs2 = rsMin(maxx, startx + 2);
268    int xs3 = rsMin(maxx, startx + 3);
269
270    float2 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
271                                  yp0[xs2], yp0[xs3], xf);
272    float2 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
273                                  yp1[xs2], yp1[xs3], xf);
274    float2 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
275                                  yp2[xs2], yp2[xs3], xf);
276    float2 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
277                                  yp3[xs2], yp3[xs3], xf);
278
279    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
280    return p;
281}
282
283static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
284                        float xf, float yf, int width) {
285    int startx = (int) floor(xf - 1);
286    xf = xf - floor(xf);
287    int maxx = width - 1;
288    int xs0 = rsMax(0, startx + 0);
289    int xs1 = rsMax(0, startx + 1);
290    int xs2 = rsMin(maxx, startx + 2);
291    int xs3 = rsMin(maxx, startx + 3);
292
293    float p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
294                                 yp0[xs2], yp0[xs3], xf);
295    float p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
296                                 yp1[xs2], yp1[xs3], xf);
297    float p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
298                                 yp2[xs2], yp2[xs3], xf);
299    float p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
300                                 yp3[xs2], yp3[xs3], xf);
301
302    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
303    return p;
304}
305
306void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
307                                                uint32_t xstart, uint32_t xend,
308                                                uint32_t outstep) {
309    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
310
311    if (!cp->mAlloc.get()) {
312        ALOGE("Resize executed without input, skipping");
313        return;
314    }
315    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
316    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
317    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
318    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
319
320    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
321    int starty = (int) floor(yf - 1);
322    yf = yf - floor(yf);
323    int maxy = srcHeight - 1;
324    int ys0 = rsMax(0, starty + 0);
325    int ys1 = rsMax(0, starty + 1);
326    int ys2 = rsMin(maxy, starty + 2);
327    int ys3 = rsMin(maxy, starty + 3);
328
329    const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
330    const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
331    const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
332    const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
333
334    uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
335    uint32_t x1 = xstart;
336    uint32_t x2 = xend;
337
338#if defined(ARCH_ARM_USE_INTRINSICS)
339    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
340        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
341        long xf16 = rint(xf * 0x10000);
342        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
343
344        int xoff = (xf16 >> 16) - 1;
345        int xclip = rsMax(0, xoff) - xoff;
346        int len = x2 - x1;
347
348        int32_t yr[4];
349        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
350        mkYCoeff(yr, yf);
351
352        xoff += xclip;
353
354        rsdIntrinsicResizeB4_K(
355                out, len,
356                xf16 & 0xffff, xinc16,
357                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
358                xclip, srcWidth - xoff + xclip,
359                osc_ctl, yr);
360        out += len;
361        x1 += len;
362    }
363#endif
364
365    while(x1 < x2) {
366        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
367        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
368        out++;
369        x1++;
370    }
371}
372
373void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
374                                                uint32_t xstart, uint32_t xend,
375                                                uint32_t outstep) {
376    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
377
378    if (!cp->mAlloc.get()) {
379        ALOGE("Resize executed without input, skipping");
380        return;
381    }
382    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
383    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
384    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
385    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
386
387    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
388    int starty = (int) floor(yf - 1);
389    yf = yf - floor(yf);
390    int maxy = srcHeight - 1;
391    int ys0 = rsMax(0, starty + 0);
392    int ys1 = rsMax(0, starty + 1);
393    int ys2 = rsMin(maxy, starty + 2);
394    int ys3 = rsMin(maxy, starty + 3);
395
396    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
397    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
398    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
399    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
400
401    uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
402    uint32_t x1 = xstart;
403    uint32_t x2 = xend;
404
405#if defined(ARCH_ARM_USE_INTRINSICS)
406    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
407        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
408        long xf16 = rint(xf * 0x10000);
409        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
410
411        int xoff = (xf16 >> 16) - 1;
412        int xclip = rsMax(0, xoff) - xoff;
413        int len = x2 - x1;
414
415        int32_t yr[4];
416        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
417        mkYCoeff(yr, yf);
418
419        xoff += xclip;
420
421        rsdIntrinsicResizeB2_K(
422                out, len,
423                xf16 & 0xffff, xinc16,
424                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
425                xclip, srcWidth - xoff + xclip,
426                osc_ctl, yr);
427        out += len;
428        x1 += len;
429    }
430#endif
431
432    while(x1 < x2) {
433        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
434        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
435        out++;
436        x1++;
437    }
438}
439
440void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
441                                                uint32_t xstart, uint32_t xend,
442                                                uint32_t outstep) {
443    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
444
445    if (!cp->mAlloc.get()) {
446        ALOGE("Resize executed without input, skipping");
447        return;
448    }
449    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
450    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
451    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
452    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
453
454    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
455    int starty = (int) floor(yf - 1);
456    yf = yf - floor(yf);
457    int maxy = srcHeight - 1;
458    int ys0 = rsMax(0, starty + 0);
459    int ys1 = rsMax(0, starty + 1);
460    int ys2 = rsMin(maxy, starty + 2);
461    int ys3 = rsMin(maxy, starty + 3);
462
463    const uchar *yp0 = pin + stride * ys0;
464    const uchar *yp1 = pin + stride * ys1;
465    const uchar *yp2 = pin + stride * ys2;
466    const uchar *yp3 = pin + stride * ys3;
467
468    uchar *out = ((uchar *)info->outPtr[0]) + xstart;
469    uint32_t x1 = xstart;
470    uint32_t x2 = xend;
471
472#if defined(ARCH_ARM_USE_INTRINSICS)
473    if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
474        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
475        long xf16 = rint(xf * 0x10000);
476        uint32_t xinc16 = rint(cp->scaleX * 0x10000);
477
478        int xoff = (xf16 >> 16) - 1;
479        int xclip = rsMax(0, xoff) - xoff;
480        int len = x2 - x1;
481
482        int32_t yr[4];
483        uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
484        mkYCoeff(yr, yf);
485
486        xoff += xclip;
487
488        rsdIntrinsicResizeB1_K(
489                out, len,
490                xf16 & 0xffff, xinc16,
491                yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
492                xclip, srcWidth - xoff + xclip,
493                osc_ctl, yr);
494        out += len;
495        x1 += len;
496    }
497#endif
498
499    while(x1 < x2) {
500        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
501        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
502        out++;
503        x1++;
504    }
505}
506
507void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
508                                                uint32_t xstart, uint32_t xend,
509                                                uint32_t outstep) {
510    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
511
512    if (!cp->mAlloc.get()) {
513        ALOGE("Resize executed without input, skipping");
514        return;
515    }
516    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
517    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
518    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
519    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
520
521    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
522    int starty = (int) floor(yf - 1);
523    yf = yf - floor(yf);
524    int maxy = srcHeight - 1;
525    int ys0 = rsMax(0, starty + 0);
526    int ys1 = rsMax(0, starty + 1);
527    int ys2 = rsMin(maxy, starty + 2);
528    int ys3 = rsMin(maxy, starty + 3);
529
530    const float4 *yp0 = (const float4 *)(pin + stride * ys0);
531    const float4 *yp1 = (const float4 *)(pin + stride * ys1);
532    const float4 *yp2 = (const float4 *)(pin + stride * ys2);
533    const float4 *yp3 = (const float4 *)(pin + stride * ys3);
534
535    float4 *out = ((float4 *)info->outPtr[0]) + xstart;
536    uint32_t x1 = xstart;
537    uint32_t x2 = xend;
538
539    while(x1 < x2) {
540        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
541        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
542        out++;
543        x1++;
544    }
545}
546
547void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
548                                                uint32_t xstart, uint32_t xend,
549                                                uint32_t outstep) {
550    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
551
552    if (!cp->mAlloc.get()) {
553        ALOGE("Resize executed without input, skipping");
554        return;
555    }
556    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
557    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
558    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
559    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
560
561    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
562    int starty = (int) floor(yf - 1);
563    yf = yf - floor(yf);
564    int maxy = srcHeight - 1;
565    int ys0 = rsMax(0, starty + 0);
566    int ys1 = rsMax(0, starty + 1);
567    int ys2 = rsMin(maxy, starty + 2);
568    int ys3 = rsMin(maxy, starty + 3);
569
570    const float2 *yp0 = (const float2 *)(pin + stride * ys0);
571    const float2 *yp1 = (const float2 *)(pin + stride * ys1);
572    const float2 *yp2 = (const float2 *)(pin + stride * ys2);
573    const float2 *yp3 = (const float2 *)(pin + stride * ys3);
574
575    float2 *out = ((float2 *)info->outPtr[0]) + xstart;
576    uint32_t x1 = xstart;
577    uint32_t x2 = xend;
578
579    while(x1 < x2) {
580        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
581        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
582        out++;
583        x1++;
584    }
585}
586
587void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
588                                                uint32_t xstart, uint32_t xend,
589                                                uint32_t outstep) {
590    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
591
592    if (!cp->mAlloc.get()) {
593        ALOGE("Resize executed without input, skipping");
594        return;
595    }
596    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
597    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
598    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
599    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
600
601    float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
602    int starty = (int) floor(yf - 1);
603    yf = yf - floor(yf);
604    int maxy = srcHeight - 1;
605    int ys0 = rsMax(0, starty + 0);
606    int ys1 = rsMax(0, starty + 1);
607    int ys2 = rsMin(maxy, starty + 2);
608    int ys3 = rsMin(maxy, starty + 3);
609
610    const float *yp0 = (const float *)(pin + stride * ys0);
611    const float *yp1 = (const float *)(pin + stride * ys1);
612    const float *yp2 = (const float *)(pin + stride * ys2);
613    const float *yp3 = (const float *)(pin + stride * ys3);
614
615    float *out = ((float *)info->outPtr[0]) + xstart;
616    uint32_t x1 = xstart;
617    uint32_t x2 = xend;
618
619    while(x1 < x2) {
620        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
621        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
622        out++;
623        x1++;
624    }
625}
626
627RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
628            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
629            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
630
631}
632
633RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
634}
635
636void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
637                                            const Allocation ** ains,
638                                            uint32_t inLen, Allocation * aout,
639                                            const void * usr, uint32_t usrLen,
640                                            const RsScriptCall *sc)
641{
642    if (!mAlloc.get()) {
643        ALOGE("Resize executed without input, skipping");
644        return;
645    }
646    const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
647    const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
648
649    //check the data type to determine F or U.
650    if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
651        switch(mAlloc->getType()->getElement()->getVectorSize()) {
652        case 1:
653            mRootPtr = &kernelU1;
654            break;
655        case 2:
656            mRootPtr = &kernelU2;
657            break;
658        case 3:
659        case 4:
660            mRootPtr = &kernelU4;
661            break;
662        }
663    } else {
664        switch(mAlloc->getType()->getElement()->getVectorSize()) {
665        case 1:
666            mRootPtr = &kernelF1;
667            break;
668        case 2:
669            mRootPtr = &kernelF2;
670            break;
671        case 3:
672        case 4:
673            mRootPtr = &kernelF4;
674            break;
675        }
676    }
677
678    scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
679    scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
680
681}
682
683void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
684    s->mHal.info.exportedVariableCount = 1;
685}
686
687void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
688    mAlloc.clear();
689}
690
691RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
692
693    return new RsdCpuScriptIntrinsicResize(ctx, s, e);
694}
695
696} // namespace renderscript
697} // namespace android
698