1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21using namespace android;
22using namespace android::renderscript;
23
24namespace android {
25namespace renderscript {
26
27
28class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
29public:
30    virtual void populateScript(Script *);
31    virtual void invokeFreeChildren();
32
33    virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34
35    virtual ~RsdCpuScriptIntrinsicResize();
36    RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
37
38    virtual void preLaunch(uint32_t slot, const Allocation * ain,
39                           Allocation * aout, const void * usr,
40                           uint32_t usrLen, const RsScriptCall *sc);
41
42    float scaleX;
43    float scaleY;
44
45protected:
46    ObjectBaseRef<const Allocation> mAlloc;
47    ObjectBaseRef<const Element> mElement;
48
49    static void kernelU1(const RsForEachStubParamStruct *p,
50                         uint32_t xstart, uint32_t xend,
51                         uint32_t instep, uint32_t outstep);
52    static void kernelU2(const RsForEachStubParamStruct *p,
53                         uint32_t xstart, uint32_t xend,
54                         uint32_t instep, uint32_t outstep);
55    static void kernelU4(const RsForEachStubParamStruct *p,
56                         uint32_t xstart, uint32_t xend,
57                         uint32_t instep, uint32_t outstep);
58};
59
60}
61}
62
63
64void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
65    rsAssert(slot == 0);
66    mAlloc.set(static_cast<Allocation *>(data));
67}
68
69static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
70    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
71            + x * (3.f * (p1 - p2) + p3 - p0)));
72}
73
74static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
75    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
76            + x * (3.f * (p1 - p2) + p3 - p0)));
77}
78
79static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
80    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
81            + x * (3.f * (p1 - p2) + p3 - p0)));
82}
83
84static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
85                         float xf, float yf, int width) {
86    int startx = (int) floor(xf - 1);
87    xf = xf - floor(xf);
88    int maxx = width - 1;
89    int xs0 = rsMax(0, startx + 0);
90    int xs1 = rsMax(0, startx + 1);
91    int xs2 = rsMin(maxx, startx + 2);
92    int xs3 = rsMin(maxx, startx + 3);
93
94    float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
95                                  convert_float4(yp0[xs1]),
96                                  convert_float4(yp0[xs2]),
97                                  convert_float4(yp0[xs3]), xf);
98
99    float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
100                                  convert_float4(yp1[xs1]),
101                                  convert_float4(yp1[xs2]),
102                                  convert_float4(yp1[xs3]), xf);
103
104    float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
105                                  convert_float4(yp2[xs1]),
106                                  convert_float4(yp2[xs2]),
107                                  convert_float4(yp2[xs3]), xf);
108
109    float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
110                                  convert_float4(yp3[xs1]),
111                                  convert_float4(yp3[xs2]),
112                                  convert_float4(yp3[xs3]), xf);
113
114    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
115    p = clamp(p + 0.5f, 0.f, 255.f);
116    return convert_uchar4(p);
117}
118
119static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
120                         float xf, float yf, int width) {
121    int startx = (int) floor(xf - 1);
122    xf = xf - floor(xf);
123    int maxx = width - 1;
124    int xs0 = rsMax(0, startx + 0);
125    int xs1 = rsMax(0, startx + 1);
126    int xs2 = rsMin(maxx, startx + 2);
127    int xs3 = rsMin(maxx, startx + 3);
128
129    float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
130                                  convert_float2(yp0[xs1]),
131                                  convert_float2(yp0[xs2]),
132                                  convert_float2(yp0[xs3]), xf);
133
134    float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
135                                  convert_float2(yp1[xs1]),
136                                  convert_float2(yp1[xs2]),
137                                  convert_float2(yp1[xs3]), xf);
138
139    float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
140                                  convert_float2(yp2[xs1]),
141                                  convert_float2(yp2[xs2]),
142                                  convert_float2(yp2[xs3]), xf);
143
144    float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
145                                  convert_float2(yp3[xs1]),
146                                  convert_float2(yp3[xs2]),
147                                  convert_float2(yp3[xs3]), xf);
148
149    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
150    p = clamp(p + 0.5f, 0.f, 255.f);
151    return convert_uchar2(p);
152}
153
154static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
155                        float xf, float yf, int width) {
156    int startx = (int) floor(xf - 1);
157    xf = xf - floor(xf);
158    int maxx = width - 1;
159    int xs0 = rsMax(0, startx + 0);
160    int xs1 = rsMax(0, startx + 1);
161    int xs2 = rsMin(maxx, startx + 2);
162    int xs3 = rsMin(maxx, startx + 3);
163
164    float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
165                                 (float)yp0[xs2], (float)yp0[xs3], xf);
166    float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
167                                 (float)yp1[xs2], (float)yp1[xs3], xf);
168    float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
169                                 (float)yp2[xs2], (float)yp2[xs3], xf);
170    float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
171                                 (float)yp3[xs2], (float)yp3[xs3], xf);
172
173    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
174    p = clamp(p + 0.5f, 0.f, 255.f);
175    return (uchar)p;
176}
177
178void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
179                                                uint32_t xstart, uint32_t xend,
180                                                uint32_t instep, uint32_t outstep) {
181    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
182
183    if (!cp->mAlloc.get()) {
184        ALOGE("Resize executed without input, skipping");
185        return;
186    }
187    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
188    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
189    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
190    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
191
192    float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
193    int starty = (int) floor(yf - 1);
194    yf = yf - floor(yf);
195    int maxy = srcHeight - 1;
196    int ys0 = rsMax(0, starty + 0);
197    int ys1 = rsMax(0, starty + 1);
198    int ys2 = rsMin(maxy, starty + 2);
199    int ys3 = rsMin(maxy, starty + 3);
200
201    const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
202    const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
203    const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
204    const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
205
206    uchar4 *out = ((uchar4 *)p->out) + xstart;
207    uint32_t x1 = xstart;
208    uint32_t x2 = xend;
209
210    while(x1 < x2) {
211        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
212        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
213        out++;
214        x1++;
215    }
216}
217
218void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
219                                                uint32_t xstart, uint32_t xend,
220                                                uint32_t instep, uint32_t outstep) {
221    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
222
223    if (!cp->mAlloc.get()) {
224        ALOGE("Resize executed without input, skipping");
225        return;
226    }
227    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
228    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
229    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
230    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
231
232    float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
233    int starty = (int) floor(yf - 1);
234    yf = yf - floor(yf);
235    int maxy = srcHeight - 1;
236    int ys0 = rsMax(0, starty + 0);
237    int ys1 = rsMax(0, starty + 1);
238    int ys2 = rsMin(maxy, starty + 2);
239    int ys3 = rsMin(maxy, starty + 3);
240
241    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
242    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
243    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
244    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
245
246    uchar2 *out = ((uchar2 *)p->out) + xstart;
247    uint32_t x1 = xstart;
248    uint32_t x2 = xend;
249
250    while(x1 < x2) {
251        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
252        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
253        out++;
254        x1++;
255    }
256}
257
258void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
259                                                uint32_t xstart, uint32_t xend,
260                                                uint32_t instep, uint32_t outstep) {
261    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
262
263    if (!cp->mAlloc.get()) {
264        ALOGE("Resize executed without input, skipping");
265        return;
266    }
267    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
268    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
269    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
270    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
271
272    float yf = (p->y + 0.5f) * cp->scaleY - 0.5f;
273    int starty = (int) floor(yf - 1);
274    yf = yf - floor(yf);
275    int maxy = srcHeight - 1;
276    int ys0 = rsMax(0, starty + 0);
277    int ys1 = rsMax(0, starty + 1);
278    int ys2 = rsMin(maxy, starty + 2);
279    int ys3 = rsMin(maxy, starty + 3);
280
281    const uchar *yp0 = pin + stride * ys0;
282    const uchar *yp1 = pin + stride * ys1;
283    const uchar *yp2 = pin + stride * ys2;
284    const uchar *yp3 = pin + stride * ys3;
285
286    uchar *out = ((uchar *)p->out) + xstart;
287    uint32_t x1 = xstart;
288    uint32_t x2 = xend;
289
290    while(x1 < x2) {
291        float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
292        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
293        out++;
294        x1++;
295    }
296}
297
298RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
299            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
300            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
301
302}
303
304RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
305}
306
307void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain,
308                                            Allocation * aout, const void * usr,
309                                            uint32_t usrLen, const RsScriptCall *sc)
310{
311    if (!mAlloc.get()) {
312        ALOGE("Resize executed without input, skipping");
313        return;
314    }
315    const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
316    const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
317    const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
318
319    switch(mAlloc->getType()->getElement()->getVectorSize()) {
320    case 1:
321        mRootPtr = &kernelU1;
322        break;
323    case 2:
324        mRootPtr = &kernelU2;
325        break;
326    case 3:
327    case 4:
328        mRootPtr = &kernelU4;
329        break;
330    }
331
332    scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
333    scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
334
335}
336
337void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
338    s->mHal.info.exportedVariableCount = 1;
339}
340
341void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
342    mAlloc.clear();
343}
344
345
346RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
347
348    return new RsdCpuScriptIntrinsicResize(ctx, s, e);
349}
350
351
352