1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18#include "rsdCore.h" 19#include "rsdIntrinsics.h" 20#include "rsdAllocation.h" 21 22#include "rsdIntrinsicInlines.h" 23 24using namespace android; 25using namespace android::renderscript; 26 27struct ConvolveParams { 28 float fp[104]; 29 short ip[104]; 30 float radius; 31 int iradius; 32 ObjectBaseRef<Allocation> alloc; 33}; 34 35static void ComputeGaussianWeights(ConvolveParams *cp) { 36 // Compute gaussian weights for the blur 37 // e is the euler's number 38 float e = 2.718281828459045f; 39 float pi = 3.1415926535897932f; 40 // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 ) 41 // x is of the form [-radius .. 0 .. radius] 42 // and sigma varies with radius. 43 // Based on some experimental radius values and sigma's 44 // we approximately fit sigma = f(radius) as 45 // sigma = radius * 0.4 + 0.6 46 // The larger the radius gets, the more our gaussian blur 47 // will resemble a box blur since with large sigma 48 // the gaussian curve begins to lose its shape 49 float sigma = 0.4f * cp->radius + 0.6f; 50 51 // Now compute the coefficients. We will store some redundant values to save 52 // some math during the blur calculations precompute some values 53 float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); 54 float coeff2 = - 1.0f / (2.0f * sigma * sigma); 55 56 float normalizeFactor = 0.0f; 57 float floatR = 0.0f; 58 int r; 59 cp->iradius = (float)ceil(cp->radius) + 0.5f; 60 for (r = -cp->iradius; r <= cp->iradius; r ++) { 61 floatR = (float)r; 62 cp->fp[r + cp->iradius] = coeff1 * powf(e, floatR * floatR * coeff2); 63 normalizeFactor += cp->fp[r + cp->iradius]; 64 } 65 66 //Now we need to normalize the weights because all our coefficients need to add up to one 67 normalizeFactor = 1.0f / normalizeFactor; 68 for (r = -cp->iradius; r <= cp->iradius; r ++) { 69 cp->fp[r + cp->iradius] *= normalizeFactor; 70 cp->ip[r + cp->iradius] = (short)(cp->ip[r + cp->iradius] * 32768); 71 } 72} 73 74static void Blur_Bind(const Context *dc, const Script *script, 75 void * intrinsicData, uint32_t slot, Allocation *data) { 76 ConvolveParams *cp = (ConvolveParams *)intrinsicData; 77 rsAssert(slot == 1); 78 cp->alloc.set(data); 79} 80 81static void Blur_SetVar(const Context *dc, const Script *script, void * intrinsicData, 82 uint32_t slot, void *data, size_t dataLength) { 83 ConvolveParams *cp = (ConvolveParams *)intrinsicData; 84 rsAssert(slot == 0); 85 86 cp->radius = ((const float *)data)[0]; 87 ComputeGaussianWeights(cp); 88} 89 90 91 92static void OneV(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y, 93 const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 94 95 const uchar *pi = ptrIn + x*4; 96 97 float4 blurredPixel = 0; 98 for (int r = -iradius; r <= iradius; r ++) { 99 int validY = rsMax((y + r), 0); 100 validY = rsMin(validY, (int)(p->dimY - 1)); 101 const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; 102 float4 pf = convert_float4(pvy[0]); 103 blurredPixel += pf * gPtr[0]; 104 gPtr++; 105 } 106 107 out->xyzw = blurredPixel; 108} 109 110extern "C" void rsdIntrinsicBlurVF_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int x2); 111extern "C" void rsdIntrinsicBlurHF_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int x2); 112 113static void OneVF(float4 *out, 114 const uchar *ptrIn, int iStride, const float* gPtr, int ct, 115 int x1, int x2) { 116 117#if defined(ARCH_ARM_HAVE_NEON) 118 { 119 int t = (x2 - x1); 120 t &= ~1; 121 if(t) { 122 rsdIntrinsicBlurVF_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); 123 } 124 x1 += t; 125 } 126#endif 127 128 while(x2 > x1) { 129 const uchar *pi = ptrIn + x1 * 4; 130 float4 blurredPixel = 0; 131 const float* gp = gPtr; 132 133 for (int r = 0; r < ct; r++) { 134 float4 pf = convert_float4(((const uchar4 *)pi)[0]); 135 blurredPixel += pf * gp[0]; 136 pi += iStride; 137 gp++; 138 } 139 out->xyzw = blurredPixel; 140 x1++; 141 out++; 142 } 143} 144 145static void OneH(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x, 146 const float4 *ptrIn, const float* gPtr, int iradius) { 147 148 float4 blurredPixel = 0; 149 for (int r = -iradius; r <= iradius; r ++) { 150 int validX = rsMax((x + r), 0); 151 validX = rsMin(validX, (int)(p->dimX - 1)); 152 float4 pf = ptrIn[validX]; 153 blurredPixel += pf * gPtr[0]; 154 gPtr++; 155 } 156 157 out->xyzw = convert_uchar4(blurredPixel); 158} 159 160 161static void Blur_uchar4(const RsForEachStubParamStruct *p, 162 uint32_t xstart, uint32_t xend, 163 uint32_t instep, uint32_t outstep) { 164 float buf[4 * 2048]; 165 ConvolveParams *cp = (ConvolveParams *)p->usr; 166 if (!cp->alloc.get()) { 167 ALOGE("Blur executed without input, skipping"); 168 return; 169 } 170 DrvAllocation *din = (DrvAllocation *)cp->alloc->mHal.drv; 171 const uchar *pin = (const uchar *)din->lod[0].mallocPtr; 172 173 uchar4 *out = (uchar4 *)p->out; 174 uint32_t x1 = xstart; 175 uint32_t x2 = xend; 176 177 float4 *fout = (float4 *)buf; 178 int y = p->y; 179 if ((y > cp->iradius) && (y < ((int)p->dimY - cp->iradius))) { 180 const uchar *pi = pin + (y - cp->iradius) * din->lod[0].stride; 181 OneVF(fout, pi, din->lod[0].stride, cp->fp, cp->iradius * 2 + 1, x1, x2); 182 } else { 183 while(x2 > x1) { 184 OneV(p, fout, x1, y, pin, din->lod[0].stride, cp->fp, cp->iradius); 185 fout++; 186 x1++; 187 } 188 } 189 190 x1 = xstart; 191 while ((x1 < (uint32_t)cp->iradius) && (x1 < x2)) { 192 OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius); 193 out++; 194 x1++; 195 } 196#if defined(ARCH_ARM_HAVE_NEON) 197 if ((x1 + cp->iradius) < x2) { 198 rsdIntrinsicBlurHF_K(out, ((float4 *)buf) - cp->iradius, cp->fp, cp->iradius * 2 + 1, x1, x2 - cp->iradius); 199 out += (x2 - cp->iradius) - x1; 200 x1 = x2 - cp->iradius; 201 } 202#endif 203 while(x2 > x1) { 204 OneH(p, out, x1, (float4 *)buf, cp->fp, cp->iradius); 205 out++; 206 x1++; 207 } 208 209} 210 211void * rsdIntrinsic_InitBlur(const android::renderscript::Context *dc, 212 android::renderscript::Script *script, 213 RsdIntriniscFuncs_t *funcs) { 214 215 script->mHal.info.exportedVariableCount = 2; 216 funcs->setVarObj = Blur_Bind; 217 funcs->setVar = Blur_SetVar; 218 funcs->root = Blur_uchar4; 219 220 ConvolveParams *cp = (ConvolveParams *)calloc(1, sizeof(ConvolveParams)); 221 cp->radius = 5; 222 ComputeGaussianWeights(cp); 223 return cp; 224} 225 226 227