1d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams/*
2d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Copyright (C) 2012 The Android Open Source Project
3d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams *
4d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Licensed under the Apache License, Version 2.0 (the "License");
5d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * you may not use this file except in compliance with the License.
6d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * You may obtain a copy of the License at
7d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams *
8d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams *      http://www.apache.org/licenses/LICENSE-2.0
9d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams *
10d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Unless required by applicable law or agreed to in writing, software
11d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * distributed under the License is distributed on an "AS IS" BASIS,
12d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * See the License for the specific language governing permissions and
14d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * limitations under the License.
15d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams */
16d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
17d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
18d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams#include "rsdCore.h"
19d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams#include "rsdIntrinsics.h"
20d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams#include "rsdAllocation.h"
21d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
22d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams#include "rsdIntrinsicInlines.h"
23d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
24d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android;
25d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android::renderscript;
26d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
27d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstruct ConvolveParams {
28d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float fp[104];
29d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    short ip[104];
30d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float radius;
31d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    int iradius;
32fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    void **scratch;
33fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    size_t *scratchSize;
34d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ObjectBaseRef<Allocation> alloc;
35d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams};
36d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
37d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstatic void ComputeGaussianWeights(ConvolveParams *cp) {
38d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // Compute gaussian weights for the blur
39d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // e is the euler's number
40d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float e = 2.718281828459045f;
41d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float pi = 3.1415926535897932f;
42d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 )
43d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // x is of the form [-radius .. 0 .. radius]
44d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // and sigma varies with radius.
45d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // Based on some experimental radius values and sigma's
46d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // we approximately fit sigma = f(radius) as
47d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // sigma = radius * 0.4  + 0.6
48d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // The larger the radius gets, the more our gaussian blur
49d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // will resemble a box blur since with large sigma
50d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // the gaussian curve begins to lose its shape
51d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float sigma = 0.4f * cp->radius + 0.6f;
52d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
53d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // Now compute the coefficients. We will store some redundant values to save
54d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    // some math during the blur calculations precompute some values
55d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma);
56d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float coeff2 = - 1.0f / (2.0f * sigma * sigma);
57d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
58d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float normalizeFactor = 0.0f;
59d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float floatR = 0.0f;
60d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    int r;
61d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    cp->iradius = (float)ceil(cp->radius) + 0.5f;
62d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    for (r = -cp->iradius; r <= cp->iradius; r ++) {
63d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        floatR = (float)r;
64d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2);
65d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        normalizeFactor += cp->fp[r + cp->iradius];
66d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    }
67d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
68d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    //Now we need to normalize the weights because all our coefficients need to add up to one
69d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    normalizeFactor = 1.0f / normalizeFactor;
70d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    for (r = -cp->iradius; r <= cp->iradius; r ++) {
71d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        cp->fp[r + cp->iradius] *= normalizeFactor;
72d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768);
73d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    }
74d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
75d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
76d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstatic void Blur_Bind(const Context *dc, const Script *script,
77d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                             void * intrinsicData, uint32_t slot, Allocation *data) {
78d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
79d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    rsAssert(slot == 1);
80d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    cp->alloc.set(data);
81d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
82d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
83d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstatic void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData,
84d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                               uint32_t slot, void *data, size_t dataLength) {
85d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
86d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    rsAssert(slot == 0);
87d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
88d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    cp->radius = ((const float *)data)[0];
89d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ComputeGaussianWeights(cp);
90d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
91d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
92d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
93d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
94d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstatic void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y,
95d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                 const uchar *ptrIn, int iStride, const float* gPtr, int iradius) {
96d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
97d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    const uchar *pi = ptrIn + x*4;
98d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
99d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float4 blurredPixel = 0;
100d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    for (int r = -iradius; r <= iradius; r ++) {
101d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        int validY = rsMax((y + r), 0);
102d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        validY = rsMin(validY, (int)(p->dimY - 1));
103d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride];
104d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        float4 pf = convert_float4(pvy[0]);
105d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        blurredPixel += pf * gPtr[0];
106d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        gPtr++;
107d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    }
108d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
109d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    out->xyzw = blurredPixel;
110d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
111d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
112e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Samsextern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2);
113e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Samsextern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2);
114e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams
115e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Samsstatic void OneVF(float4 *out,
116e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams                  const uchar *ptrIn, int iStride, const float* gPtr, int ct,
117e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams                  int x1, int x2) {
118e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams
119e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams#if defined(ARCH_ARM_HAVE_NEON)
1202207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams    {
1212207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams        int t = (x2 - x1);
1222207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams        t &= ~1;
1232207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams        if(t) {
1242207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams            rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t);
1252207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams        }
1262207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams        x1 += t;
1272207ab7e0f2d28382fe61ff002ddd58c4fa3fb99Jason Sams    }
128e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams#endif
129e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams
130e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    while(x2 > x1) {
131e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        const uchar *pi = ptrIn + x1 * 4;
132e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        float4 blurredPixel = 0;
133e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        const float* gp = gPtr;
134e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams
135e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        for (int r = 0; r < ct; r++) {
136e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams            float4 pf = convert_float4(((const uchar4 *)pi)[0]);
137e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams            blurredPixel += pf * gp[0];
138e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams            pi += iStride;
139e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams            gp++;
140e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        }
141e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        out->xyzw = blurredPixel;
142e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        x1++;
143e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        out++;
144fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        gPtr++;
145e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    }
146e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams}
147e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams
148d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstatic void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x,
149d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                const float4 *ptrIn, const float* gPtr, int iradius) {
150d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
151d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float4 blurredPixel = 0;
152d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    for (int r = -iradius; r <= iradius; r ++) {
153d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        int validX = rsMax((x + r), 0);
154d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        validX = rsMin(validX, (int)(p->dimX - 1));
155d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        float4 pf = ptrIn[validX];
156d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        blurredPixel += pf * gPtr[0];
157d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        gPtr++;
158d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    }
159d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
160d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    out->xyzw = convert_uchar4(blurredPixel);
161d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
162d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
163d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
164d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsstatic void Blur_uchar4(const RsForEachStubParamStruct *p,
165d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                                    uint32_t xstart, uint32_t xend,
166d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                                    uint32_t instep, uint32_t outstep) {
167fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    float stackbuf[4 * 2048];
168fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    float *buf = &stackbuf[0];
169d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ConvolveParams *cp = (ConvolveParams *)p->usr;
170b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams    if (!cp->alloc.get()) {
171b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams        ALOGE("Blur executed without input, skipping");
172b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams        return;
173b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams    }
174d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv;
175d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    const uchar *pin = (const uchar *)din->lod[0].mallocPtr;
176d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
177d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    uchar4 *out = (uchar4 *)p->out;
178d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    uint32_t x1 = xstart;
179d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    uint32_t x2 = xend;
180d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
181fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    if (p->dimX > 2048) {
182fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        if ((p->dimX > cp->scratchSize[p->lid]) || !cp->scratch[p->lid]) {
183fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams            cp->scratch[p->lid] = realloc(cp->scratch[p->lid], p->dimX * 16);
184fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams            cp->scratchSize[p->lid] = p->dimX;
185fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        }
186fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        buf = (float *)cp->scratch[p->lid];
187fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    }
188d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    float4 *fout = (float4 *)buf;
189fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams
190e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    int y = p->y;
191c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    uint32_t vx1 = x1;
192c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    uint32_t vx2 = x2;
193c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams
194c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    if (vx1 > (uint32_t)cp->iradius) {
195c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams        vx1 -= cp->iradius;
196c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    } else {
197c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams        vx1 = 0;
198c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    }
199c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    vx2 += cp->iradius;
200c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    if (vx2 >= p->dimX) {
201c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams        vx2 = p->dimX - 1;
202c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams    }
203c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams
204e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) {
205e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride;
206c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams        OneVF(fout + vx1, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, vx1, vx2);
207e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    } else {
208c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams        while(vx2 > vx1) {
209c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams            OneV(p, fout, vx1, y, pin, din->lod[0].stride, cp->fp, cp->iradius);
210e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams            fout++;
211c4b6b3af8ea67c6ce86a4e44e9432f2887107cffJason Sams            vx1++;
212e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        }
213d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    }
214d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
215d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    x1 = xstart;
216e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) {
217e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
218e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        out++;
219e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        x1++;
220e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    }
221e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams#if defined(ARCH_ARM_HAVE_NEON)
222e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    if ((x1 + cp->iradius) < x2) {
223e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius);
224e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        out += (x2 - cp->iradius) - x1;
225e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams        x1 = x2 - cp->iradius;
226e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams    }
227e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams#endif
228d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    while(x2 > x1) {
229d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius);
230d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        out++;
231d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams        x1++;
232d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    }
233d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
234d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
235d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
236fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Samsstatic void Destroy(const Context *rsc, const Script *script, void * intrinsicData) {
237fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
238fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    ConvolveParams *cp = (ConvolveParams *)intrinsicData;
239fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams
240fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    if (cp) {
241fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        if (cp->scratch) {
242fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams            for (size_t i = 0; i < dc->mWorkers.mCount + 1; i++) {
243fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams                if (cp->scratch[i]) {
244fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams                    free(cp->scratch[i]);
245fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams                }
246fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams            }
247fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams            free(cp->scratch);
248fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        }
249fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        if (cp->scratchSize) {
250fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams            free(cp->scratchSize);
251fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        }
252fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        free(cp);
253fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    }
254fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams}
255fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams
256fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Samsvoid * rsdIntrinsic_InitBlur(const android::renderscript::Context *rsc,
257d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                                    android::renderscript::Script *script,
258d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams                                    RsdIntriniscFuncs_t *funcs) {
259d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
260fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
261fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams
262d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    script->mHal.info.exportedVariableCount = 2;
26378b050ea1e13108110ce3b8ead63252e5ebe2468Jason Sams    funcs->setVarObj = Blur_Bind;
264d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    funcs->setVar = Blur_SetVar;
265d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    funcs->root = Blur_uchar4;
266fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    funcs->destroy = Destroy;
267d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
268d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams));
269fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    if (!cp) {
270fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        return NULL;
271fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    }
272fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams
273d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    cp->radius = 5;
274fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    cp->scratch = (void **)calloc(dc->mWorkers.mCount + 1, sizeof(void *));
275fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    cp->scratchSize = (size_t *)calloc(dc->mWorkers.mCount + 1, sizeof(size_t));
276fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    if (!cp->scratch || !cp->scratchSize) {
277fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        Destroy(rsc, script, cp);
278fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams        return NULL;
279fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams    }
280fb6dfa66fe274ae6562c00084869f8fadf8d197eJason Sams
281d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    ComputeGaussianWeights(cp);
282d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams    return cp;
283d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}
284d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
285d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams
286