1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsdCore.h"
19#include "rsdIntrinsics.h"
20#include "rsdAllocation.h"
21
22#include "rsdIntrinsicInlines.h"
23
24using namespace android;
25using namespace android::renderscript;
26
27struct ConvolveParams {
28    float fp[104];
29    short ip[104];
30    float radius;
31    int iradius;
32    void **scratch;
33    size_t *scratchSize;
34    ObjectBaseRef<Allocation> alloc;
35};
36
37static void ComputeGaussianWeights(ConvolveParams *cp) {
38    // Compute gaussian weights for the blur
39    // e is the euler's number
40    float e = 2.718281828459045f;
41    float pi = 3.1415926535897932f;
42    // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 )
43    // x is of the form [-radius .. 0 .. radius]
44    // and sigma varies with radius.
45    // Based on some experimental radius values and sigma's
46    // we approximately fit sigma = f(radius) as
47    // sigma = radius * 0.4  + 0.6
48    // The larger the radius gets, the more our gaussian blur
49    // will resemble a box blur since with large sigma
50    // the gaussian curve begins to lose its shape
51    float sigma = 0.4f * cp->radius + 0.6f;
52
53    // Now compute the coefficients. We will store some redundant values to save
54    // some math during the blur calculations precompute some values
55    float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma);
56    float coeff2 = - 1.0f / (2.0f * sigma * sigma);
57
58    float normalizeFactor = 0.0f;
59    float floatR = 0.0f;
60    int r;
61    cp->iradius = (float)ceil(cp->radius) + 0.5f;
62    for (r = -cp->iradius; r <= cp->iradius; r ++) {
63        floatR = (float)r;
64        cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2);
65        normalizeFactor += cp->fp[r + cp->iradius];
66    }
67
68    //Now we need to normalize the weights because all our coefficients need to add up to one
69    normalizeFactor = 1.0f / normalizeFactor;
70    for (r = -cp->iradius; r <= cp->iradius; r ++) {
71        cp->fp[r + cp->iradius] *= normalizeFactor;
72        cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768);
73    }
74}
75
76static void Blur_Bind(const Context *dc, const Script *script,
77                             void * intrinsicData, uint32_t slot, Allocation *data) {
78    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
79    rsAssert(slot == 1);
80    cp->alloc.set(data);
81}
82
83static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData,
84                               uint32_t slot, void *data, size_t dataLength) {
85    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
86    rsAssert(slot == 0);
87
88    cp->radius = ((const float *)data)[0];
89    ComputeGaussianWeights(cp);
90}
91
92
93
94static void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
95                 const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
96
97    const uchar *pi = ptrIn + x*4;
98
99    float4 blurredPixel = 0;
100    for (int r = -iradius; r <= iradius; r ++) {
101        int validY = rsMax((y + r), 0);
102        validY = rsMin(validY, (int)(p->dimY - 1));
103        const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride];
104        float4 pf = convert_float4(pvy[0]);
105        blurredPixel += pf * gPtr[0];
106        gPtr++;
107    }
108
109    out->xyzw = blurredPixel;
110}
111
112extern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2);
113extern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2);
114
115static void OneVF(float4 *out,
116                  const uchar *ptrIn, int iStride, const float* gPtr, int ct,
117                  int x1, int x2) {
118
119#if defined(ARCH_ARM_HAVE_NEON)
120    {
121        int t = (x2 - x1);
122        t &= ~1;
123        if(t) {
124            rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t);
125        }
126        x1 += t;
127    }
128#endif
129
130    while(x2 > x1) {
131        const uchar *pi = ptrIn + x1 * 4;
132        float4 blurredPixel = 0;
133        const float* gp = gPtr;
134
135        for (int r = 0; r < ct; r++) {
136            float4 pf = convert_float4(((const uchar4 *)pi)[0]);
137            blurredPixel += pf * gp[0];
138            pi += iStride;
139            gp++;
140        }
141        out->xyzw = blurredPixel;
142        x1++;
143        out++;
144        gPtr++;
145    }
146}
147
148static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
149                const float4 *ptrIn, const float* gPtr, int iradius) {
150
151    float4 blurredPixel = 0;
152    for (int r = -iradius; r <= iradius; r ++) {
153        int validX = rsMax((x + r), 0);
154        validX = rsMin(validX, (int)(p->dimX - 1));
155        float4 pf = ptrIn[validX];
156        blurredPixel += pf * gPtr[0];
157        gPtr++;
158    }
159
160    out->xyzw = convert_uchar4(blurredPixel);
161}
162
163
164static void Blur_uchar4(const RsForEachStubParamStruct *p,
165                                    uint32_t xstart, uint32_t xend,
166                                    uint32_t instep, uint32_t outstep) {
167    float stackbuf[4 * 2048];
168    float *buf = &stackbuf[0];
169    ConvolveParams *cp = (ConvolveParams *)p->usr;
170    if (!cp->alloc.get()) {
171        ALOGE("Blur executed without input, skipping");
172        return;
173    }
174    DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
175    const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
176
177    uchar4 *out = (uchar4 *)p->out;
178    uint32_t x1 = xstart;
179    uint32_t x2 = xend;
180
181    if (p->dimX > 2048) {
182        if ((p->dimX > cp->scratchSize[p->lid]) || !cp->scratch[p->lid]) {
183            cp->scratch[p->lid] = realloc(cp->scratch[p->lid], p->dimX * 16);
184            cp->scratchSize[p->lid] = p->dimX;
185        }
186        buf = (float *)cp->scratch[p->lid];
187    }
188    float4 *fout = (float4 *)buf;
189
190    int y = p->y;
191    uint32_t vx1 = x1;
192    uint32_t vx2 = x2;
193
194    if (vx1 > (uint32_t)cp->iradius) {
195        vx1 -= cp->iradius;
196    } else {
197        vx1 = 0;
198    }
199    vx2 += cp->iradius;
200    if (vx2 >= p->dimX) {
201        vx2 = p->dimX - 1;
202    }
203
204    if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) {
205        const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride;
206        OneVF(fout + vx1, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, vx1, vx2);
207    } else {
208        while(vx2 > vx1) {
209            OneV(p, fout, vx1, y, pin, din->lod[0].stride, cp->fp, cp->iradius);
210            fout++;
211            vx1++;
212        }
213    }
214
215    x1 = xstart;
216    while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) {
217        OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
218        out++;
219        x1++;
220    }
221#if defined(ARCH_ARM_HAVE_NEON)
222    if ((x1 + cp->iradius) < x2) {
223        rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius);
224        out += (x2 - cp->iradius) - x1;
225        x1 = x2 - cp->iradius;
226    }
227#endif
228    while(x2 > x1) {
229        OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
230        out++;
231        x1++;
232    }
233
234}
235
236static void Destroy(const Context *rsc, const Script *script, void * intrinsicData) {
237    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
238    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
239
240    if (cp) {
241        if (cp->scratch) {
242            for (size_t i = 0; i < dc->mWorkers.mCount + 1; i++) {
243                if (cp->scratch[i]) {
244                    free(cp->scratch[i]);
245                }
246            }
247            free(cp->scratch);
248        }
249        if (cp->scratchSize) {
250            free(cp->scratchSize);
251        }
252        free(cp);
253    }
254}
255
256void * rsdIntrinsic_InitBlur(const android::renderscript::Context *rsc,
257                                    android::renderscript::Script *script,
258                                    RsdIntriniscFuncs_t *funcs) {
259
260    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
261
262    script->mHal.info.exportedVariableCount = 2;
263    funcs->setVarObj = Blur_Bind;
264    funcs->setVar = Blur_SetVar;
265    funcs->root = Blur_uchar4;
266    funcs->destroy = Destroy;
267
268    ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
269    if (!cp) {
270        return NULL;
271    }
272
273    cp->radius = 5;
274    cp->scratch = (void **)calloc(dc->mWorkers.mCount + 1, sizeof(void *));
275    cp->scratchSize = (size_t *)calloc(dc->mWorkers.mCount + 1, sizeof(size_t));
276    if (!cp->scratch || !cp->scratchSize) {
277        Destroy(rsc, script, cp);
278        return NULL;
279    }
280
281    ComputeGaussianWeights(cp);
282    return cp;
283}
284
285
286