1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsdCore.h" 19#include "rsdIntrinsics.h" 20#include "rsdAllocation.h" 21 22#include "rsdIntrinsicInlines.h" 23 24using namespace android; 25using namespace android::renderscript; 26 27struct ConvolveParams { 28 float fp[104]; 29 short ip[104]; 30 float radius; 31 int iradius; 32 void **scratch; 33 size_t *scratchSize; 34 ObjectBaseRef<Allocation> alloc; 35}; 36 37static void ComputeGaussianWeights(ConvolveParams *cp) { 38 // Compute gaussian weights for the blur 39 // e is the euler's number 40 float e = 2.718281828459045f; 41 float pi = 3.1415926535897932f; 42 // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 ) 43 // x is of the form [-radius .. 0 .. radius] 44 // and sigma varies with radius. 45 // Based on some experimental radius values and sigma's 46 // we approximately fit sigma = f(radius) as 47 // sigma = radius * 0.4 + 0.6 48 // The larger the radius gets, the more our gaussian blur 49 // will resemble a box blur since with large sigma 50 // the gaussian curve begins to lose its shape 51 float sigma = 0.4f * cp->radius + 0.6f; 52 53 // Now compute the coefficients. We will store some redundant values to save 54 // some math during the blur calculations precompute some values 55 float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); 56 float coeff2 = - 1.0f / (2.0f * sigma * sigma); 57 58 float normalizeFactor = 0.0f; 59 float floatR = 0.0f; 60 int r; 61 cp->iradius = (float)ceil(cp->radius) + 0.5f; 62 for (r = -cp->iradius; r <= cp->iradius; r ++) { 63 floatR = (float)r; 64 cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2); 65 normalizeFactor += cp->fp[r + cp->iradius]; 66 } 67 68 //Now we need to normalize the weights because all our coefficients need to add up to one 69 normalizeFactor = 1.0f / normalizeFactor; 70 for (r = -cp->iradius; r <= cp->iradius; r ++) { 71 cp->fp[r + cp->iradius] *= normalizeFactor; 72 cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768); 73 } 74} 75 76static void Blur_Bind(const Context *dc, const Script *script, 77 void * intrinsicData, uint32_t slot, Allocation *data) { 78 ConvolveParams *cp = (ConvolveParams *)intrinsicData; 79 rsAssert(slot == 1); 80 cp->alloc.set(data); 81} 82 83static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData, 84 uint32_t slot, void *data, size_t dataLength) { 85 ConvolveParams *cp = (ConvolveParams *)intrinsicData; 86 rsAssert(slot == 0); 87 88 cp->radius = ((const float *)data)[0]; 89 ComputeGaussianWeights(cp); 90} 91 92 93 94static void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y, 95 const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 96 97 const uchar *pi = ptrIn + x*4; 98 99 float4 blurredPixel = 0; 100 for (int r = -iradius; r <= iradius; r ++) { 101 int validY = rsMax((y + r), 0); 102 validY = rsMin(validY, (int)(p->dimY - 1)); 103 const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; 104 float4 pf = convert_float4(pvy[0]); 105 blurredPixel += pf * gPtr[0]; 106 gPtr++; 107 } 108 109 out->xyzw = blurredPixel; 110} 111 112extern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2); 113extern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2); 114 115static void OneVF(float4 *out, 116 const uchar *ptrIn, int iStride, const float* gPtr, int ct, 117 int x1, int x2) { 118 119#if defined(ARCH_ARM_HAVE_NEON) 120 { 121 int t = (x2 - x1); 122 t &= ~1; 123 if(t) { 124 rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); 125 } 126 x1 += t; 127 } 128#endif 129 130 while(x2 > x1) { 131 const uchar *pi = ptrIn + x1 * 4; 132 float4 blurredPixel = 0; 133 const float* gp = gPtr; 134 135 for (int r = 0; r < ct; r++) { 136 float4 pf = convert_float4(((const uchar4 *)pi)[0]); 137 blurredPixel += pf * gp[0]; 138 pi += iStride; 139 gp++; 140 } 141 out->xyzw = blurredPixel; 142 x1++; 143 out++; 144 gPtr++; 145 } 146} 147 148static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x, 149 const float4 *ptrIn, const float* gPtr, int iradius) { 150 151 float4 blurredPixel = 0; 152 for (int r = -iradius; r <= iradius; r ++) { 153 int validX = rsMax((x + r), 0); 154 validX = rsMin(validX, (int)(p->dimX - 1)); 155 float4 pf = ptrIn[validX]; 156 blurredPixel += pf * gPtr[0]; 157 gPtr++; 158 } 159 160 out->xyzw = convert_uchar4(blurredPixel); 161} 162 163 164static void Blur_uchar4(const RsForEachStubParamStruct *p, 165 uint32_t xstart, uint32_t xend, 166 uint32_t instep, uint32_t outstep) { 167 float stackbuf[4 * 2048]; 168 float *buf = &stackbuf[0]; 169 ConvolveParams *cp = (ConvolveParams *)p->usr; 170 if (!cp->alloc.get()) { 171 ALOGE("Blur executed without input, skipping"); 172 return; 173 } 174 DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; 175 const uchar *pin = (const uchar *)din->lod[0].mallocPtr; 176 177 uchar4 *out = (uchar4 *)p->out; 178 uint32_t x1 = xstart; 179 uint32_t x2 = xend; 180 181 if (p->dimX > 2048) { 182 if ((p->dimX > cp->scratchSize[p->lid]) || !cp->scratch[p->lid]) { 183 cp->scratch[p->lid] = realloc(cp->scratch[p->lid], p->dimX * 16); 184 cp->scratchSize[p->lid] = p->dimX; 185 } 186 buf = (float *)cp->scratch[p->lid]; 187 } 188 float4 *fout = (float4 *)buf; 189 190 int y = p->y; 191 uint32_t vx1 = x1; 192 uint32_t vx2 = x2; 193 194 if (vx1 > (uint32_t)cp->iradius) { 195 vx1 -= cp->iradius; 196 } else { 197 vx1 = 0; 198 } 199 vx2 += cp->iradius; 200 if (vx2 >= p->dimX) { 201 vx2 = p->dimX - 1; 202 } 203 204 if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) { 205 const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride; 206 OneVF(fout + vx1, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, vx1, vx2); 207 } else { 208 while(vx2 > vx1) { 209 OneV(p, fout, vx1, y, pin, din->lod[0].stride, cp->fp, cp->iradius); 210 fout++; 211 vx1++; 212 } 213 } 214 215 x1 = xstart; 216 while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) { 217 OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius); 218 out++; 219 x1++; 220 } 221#if defined(ARCH_ARM_HAVE_NEON) 222 if ((x1 + cp->iradius) < x2) { 223 rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius); 224 out += (x2 - cp->iradius) - x1; 225 x1 = x2 - cp->iradius; 226 } 227#endif 228 while(x2 > x1) { 229 OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius); 230 out++; 231 x1++; 232 } 233 234} 235 236static void Destroy(const Context *rsc, const Script *script, void * intrinsicData) { 237 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 238 ConvolveParams *cp = (ConvolveParams *)intrinsicData; 239 240 if (cp) { 241 if (cp->scratch) { 242 for (size_t i = 0; i < dc->mWorkers.mCount + 1; i++) { 243 if (cp->scratch[i]) { 244 free(cp->scratch[i]); 245 } 246 } 247 free(cp->scratch); 248 } 249 if (cp->scratchSize) { 250 free(cp->scratchSize); 251 } 252 free(cp); 253 } 254} 255 256void * rsdIntrinsic_InitBlur(const android::renderscript::Context *rsc, 257 android::renderscript::Script *script, 258 RsdIntriniscFuncs_t *funcs) { 259 260 RsdHal * dc = (RsdHal *)rsc->mHal.drv; 261 262 script->mHal.info.exportedVariableCount = 2; 263 funcs->setVarObj = Blur_Bind; 264 funcs->setVar = Blur_SetVar; 265 funcs->root = Blur_uchar4; 266 funcs->destroy = Destroy; 267 268 ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); 269 if (!cp) { 270 return NULL; 271 } 272 273 cp->radius = 5; 274 cp->scratch = (void **)calloc(dc->mWorkers.mCount + 1, sizeof(void *)); 275 cp->scratchSize = (size_t *)calloc(dc->mWorkers.mCount + 1, sizeof(size_t)); 276 if (!cp->scratch || !cp->scratchSize) { 277 Destroy(rsc, script, cp); 278 return NULL; 279 } 280 281 ComputeGaussianWeights(cp); 282 return cp; 283} 284 285 286