rsCpuIntrinsicResize.cpp revision 9ed79105cc6a8dbfaf959875249f36022cc2c798
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21using namespace android;
22using namespace android::renderscript;
23
24namespace android {
25namespace renderscript {
26
27
28class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
29public:
30    virtual void populateScript(Script *);
31    virtual void invokeFreeChildren();
32
33    virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34
35    virtual ~RsdCpuScriptIntrinsicResize();
36    RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
37
38    virtual void preLaunch(uint32_t slot, const Allocation ** ains,
39                           uint32_t inLen, Allocation * aout, const void * usr,
40                           uint32_t usrLen, const RsScriptCall *sc);
41
42    float scaleX;
43    float scaleY;
44
45protected:
46    ObjectBaseRef<const Allocation> mAlloc;
47    ObjectBaseRef<const Element> mElement;
48
49    static void kernelU1(const RsExpandKernelParams *p,
50                         uint32_t xstart, uint32_t xend,
51                         uint32_t outstep);
52    static void kernelU2(const RsExpandKernelParams *p,
53                         uint32_t xstart, uint32_t xend,
54                         uint32_t outstep);
55    static void kernelU4(const RsExpandKernelParams *p,
56                         uint32_t xstart, uint32_t xend,
57                         uint32_t outstep);
58};
59
60}
61}
62
63
64void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
65    rsAssert(slot == 0);
66    mAlloc.set(static_cast<Allocation *>(data));
67}
68
69
70extern "C" void rsdIntrinsicConvolve3x3_K(void *dst, const void *y0, const void *y1,
71                                          const void *y2, const short *coef, uint32_t count);
72
73static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
74    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
75            + x * (3.f * (p1 - p2) + p3 - p0)));
76}
77
78static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
79    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
80            + x * (3.f * (p1 - p2) + p3 - p0)));
81}
82
83static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
84    return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
85            + x * (3.f * (p1 - p2) + p3 - p0)));
86}
87
88static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
89                         float xf, float yf, int width) {
90    int startx = (int) floor(xf - 2);
91    xf = xf - floor(xf);
92    int maxx = width - 1;
93    int xs0 = rsMax(0, startx + 0);
94    int xs1 = rsMax(0, startx + 1);
95    int xs2 = rsMin(maxx, startx + 2);
96    int xs3 = rsMin(maxx, startx + 3);
97
98    float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
99                                  convert_float4(yp0[xs1]),
100                                  convert_float4(yp0[xs2]),
101                                  convert_float4(yp0[xs3]), xf);
102
103    float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
104                                  convert_float4(yp1[xs1]),
105                                  convert_float4(yp1[xs2]),
106                                  convert_float4(yp1[xs3]), xf);
107
108    float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
109                                  convert_float4(yp2[xs1]),
110                                  convert_float4(yp2[xs2]),
111                                  convert_float4(yp2[xs3]), xf);
112
113    float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
114                                  convert_float4(yp3[xs1]),
115                                  convert_float4(yp3[xs2]),
116                                  convert_float4(yp3[xs3]), xf);
117
118    float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
119    p = clamp(p, 0.f, 255.f);
120    return convert_uchar4(p);
121}
122
123static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
124                         float xf, float yf, int width) {
125    int startx = (int) floor(xf - 2);
126    xf = xf - floor(xf);
127    int maxx = width - 1;
128    int xs0 = rsMax(0, startx + 0);
129    int xs1 = rsMax(0, startx + 1);
130    int xs2 = rsMin(maxx, startx + 2);
131    int xs3 = rsMin(maxx, startx + 3);
132
133    float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
134                                  convert_float2(yp0[xs1]),
135                                  convert_float2(yp0[xs2]),
136                                  convert_float2(yp0[xs3]), xf);
137
138    float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
139                                  convert_float2(yp1[xs1]),
140                                  convert_float2(yp1[xs2]),
141                                  convert_float2(yp1[xs3]), xf);
142
143    float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
144                                  convert_float2(yp2[xs1]),
145                                  convert_float2(yp2[xs2]),
146                                  convert_float2(yp2[xs3]), xf);
147
148    float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
149                                  convert_float2(yp3[xs1]),
150                                  convert_float2(yp3[xs2]),
151                                  convert_float2(yp3[xs3]), xf);
152
153    float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
154    p = clamp(p, 0.f, 255.f);
155    return convert_uchar2(p);
156}
157
158static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
159                        float xf, float yf, int width) {
160    int startx = (int) floor(xf - 2);
161    xf = xf - floor(xf);
162    int maxx = width - 1;
163    int xs0 = rsMax(0, startx + 0);
164    int xs1 = rsMax(0, startx + 1);
165    int xs2 = rsMin(maxx, startx + 2);
166    int xs3 = rsMin(maxx, startx + 3);
167
168    float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
169                                 (float)yp0[xs2], (float)yp0[xs3], xf);
170    float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
171                                 (float)yp1[xs2], (float)yp1[xs3], xf);
172    float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
173                                 (float)yp2[xs2], (float)yp2[xs3], xf);
174    float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
175                                 (float)yp3[xs2], (float)yp3[xs3], xf);
176
177    float p  = cubicInterpolate(p0, p1, p2, p3, yf);
178    p = clamp(p, 0.f, 255.f);
179    return (uchar)p;
180}
181
182void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
183                                                uint32_t xstart, uint32_t xend,
184                                                uint32_t outstep) {
185    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
186
187    if (!cp->mAlloc.get()) {
188        ALOGE("Resize executed without input, skipping");
189        return;
190    }
191    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
192    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
193    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
194    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
195
196    float yf = p->y * cp->scaleY;
197    int starty = (int) floor(yf - 2);
198    yf = yf - floor(yf);
199    int maxy = srcHeight - 1;
200    int ys0 = rsMax(0, starty + 0);
201    int ys1 = rsMax(0, starty + 1);
202    int ys2 = rsMin(maxy, starty + 2);
203    int ys3 = rsMin(maxy, starty + 3);
204
205    const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
206    const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
207    const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
208    const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
209
210    uchar4 *out = ((uchar4 *)p->out) + xstart;
211    uint32_t x1 = xstart;
212    uint32_t x2 = xend;
213
214    while(x1 < x2) {
215        float xf = x1 * cp->scaleX;
216        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
217        out++;
218        x1++;
219    }
220}
221
222void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
223                                                uint32_t xstart, uint32_t xend,
224                                                uint32_t outstep) {
225    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
226
227    if (!cp->mAlloc.get()) {
228        ALOGE("Resize executed without input, skipping");
229        return;
230    }
231    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
232    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
233    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
234    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
235
236    float yf = p->y * cp->scaleY;
237    int starty = (int) floor(yf - 2);
238    yf = yf - floor(yf);
239    int maxy = srcHeight - 1;
240    int ys0 = rsMax(0, starty + 0);
241    int ys1 = rsMax(0, starty + 1);
242    int ys2 = rsMin(maxy, starty + 2);
243    int ys3 = rsMin(maxy, starty + 3);
244
245    const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
246    const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
247    const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
248    const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
249
250    uchar2 *out = ((uchar2 *)p->out) + xstart;
251    uint32_t x1 = xstart;
252    uint32_t x2 = xend;
253
254    while(x1 < x2) {
255        float xf = x1 * cp->scaleX;
256        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
257        out++;
258        x1++;
259    }
260}
261
262void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
263                                                uint32_t xstart, uint32_t xend,
264                                                uint32_t outstep) {
265    RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
266
267    if (!cp->mAlloc.get()) {
268        ALOGE("Resize executed without input, skipping");
269        return;
270    }
271    const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
272    const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
273    const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
274    const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
275
276    float yf = p->y * cp->scaleY;
277    int starty = (int) floor(yf - 2);
278    yf = yf - floor(yf);
279    int maxy = srcHeight - 1;
280    int ys0 = rsMax(0, starty + 0);
281    int ys1 = rsMax(0, starty + 1);
282    int ys2 = rsMin(maxy, starty + 2);
283    int ys3 = rsMin(maxy, starty + 3);
284
285    const uchar *yp0 = pin + stride * ys0;
286    const uchar *yp1 = pin + stride * ys1;
287    const uchar *yp2 = pin + stride * ys2;
288    const uchar *yp3 = pin + stride * ys3;
289
290    uchar *out = ((uchar *)p->out) + xstart;
291    uint32_t x1 = xstart;
292    uint32_t x2 = xend;
293
294    while(x1 < x2) {
295        float xf = x1 * cp->scaleX;
296        *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
297        out++;
298        x1++;
299    }
300}
301
302RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
303            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
304            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
305
306}
307
308RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
309}
310
311void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
312                                            const Allocation ** ains,
313                                            uint32_t inLen, Allocation * aout,
314                                            const void * usr, uint32_t usrLen,
315                                            const RsScriptCall *sc)
316{
317    if (!mAlloc.get()) {
318        ALOGE("Resize executed without input, skipping");
319        return;
320    }
321    const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
322    const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
323    const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
324
325    switch(mAlloc->getType()->getElement()->getVectorSize()) {
326    case 1:
327        mRootPtr = &kernelU1;
328        break;
329    case 2:
330        mRootPtr = &kernelU2;
331        break;
332    case 3:
333    case 4:
334        mRootPtr = &kernelU4;
335        break;
336    }
337
338    scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
339    scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
340
341}
342
343void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
344    s->mHal.info.exportedVariableCount = 1;
345}
346
347void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
348    mAlloc.clear();
349}
350
351
352RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
353
354    return new RsdCpuScriptIntrinsicResize(ctx, s, e);
355}
356