rsCpuIntrinsicBlur.cpp revision 8b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8f
1d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams/* 2d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Copyright (C) 2012 The Android Open Source Project 3d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 4d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * you may not use this file except in compliance with the License. 6d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * You may obtain a copy of the License at 7d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 8d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * http://www.apache.org/licenses/LICENSE-2.0 9d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 10d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Unless required by applicable law or agreed to in writing, software 11d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * See the License for the specific language governing permissions and 14d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * limitations under the License. 15d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams */ 16d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 17709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsic.h" 18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsicInlines.h" 19d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 20d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android; 21d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android::renderscript; 22d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 23709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace android { 24709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace renderscript { 25709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 27709a0978ae141198018ca9769f8d96292a8928e6Jason Samsclass RsdCpuScriptIntrinsicBlur : public RsdCpuScriptIntrinsic { 28709a0978ae141198018ca9769f8d96292a8928e6Jason Samspublic: 29709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void populateScript(Script *); 30709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void invokeFreeChildren(); 31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 32709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 33709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 34709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual ~RsdCpuScriptIntrinsicBlur(); 36c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 37709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 38709a0978ae141198018ca9769f8d96292a8928e6Jason Samsprotected: 39c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float mFp[104]; 40446788007efe0a673d0366284026adfa17b36fedSimon Hosie uint16_t mIp[104]; 41c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams void **mScratch; 42c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams size_t *mScratchSize; 43c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float mRadius; 44c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams int mIradius; 45c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams ObjectBaseRef<Allocation> mAlloc; 46709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 47c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams static void kernelU4(const RsForEachStubParamStruct *p, 48c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 49c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep); 50c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams static void kernelU1(const RsForEachStubParamStruct *p, 51c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 52c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep); 53709a0978ae141198018ca9769f8d96292a8928e6Jason Sams void ComputeGaussianWeights(); 54d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}; 55d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 56709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 57709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 58709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 59709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 60709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::ComputeGaussianWeights() { 61c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams memset(mFp, 0, sizeof(mFp)); 62c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams memset(mIp, 0, sizeof(mIp)); 637079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams 64d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // Compute gaussian weights for the blur 65d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // e is the euler's number 668b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // TODO Define these constants only once 67d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float e = 2.718281828459045f; 68d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float pi = 3.1415926535897932f; 698b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // g(x) = (1 / (sqrt(2 * pi) * sigma)) * e ^ (-x^2 / (2 * sigma^2)) 70d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // x is of the form [-radius .. 0 .. radius] 718b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // and sigma varies with the radius. 728b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // Based on some experimental radius values and sigmas, 73d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // we approximately fit sigma = f(radius) as 74d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // sigma = radius * 0.4 + 0.6 75d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // The larger the radius gets, the more our gaussian blur 76d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // will resemble a box blur since with large sigma 77d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // the gaussian curve begins to lose its shape 78c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float sigma = 0.4f * mRadius + 0.6f; 79d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 80d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // Now compute the coefficients. We will store some redundant values to save 81d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // some math during the blur calculations precompute some values 82d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); 83d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float coeff2 = - 1.0f / (2.0f * sigma * sigma); 84d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 85d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float normalizeFactor = 0.0f; 86d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float floatR = 0.0f; 87d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int r; 88c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mIradius = (float)ceil(mRadius) + 0.5f; 89c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (r = -mIradius; r <= mIradius; r ++) { 90d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams floatR = (float)r; 91c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mFp[r + mIradius] = coeff1 * powf(e, floatR * floatR * coeff2); 92c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams normalizeFactor += mFp[r + mIradius]; 93d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 94d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 958b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // Now we need to normalize the weights because all our coefficients need to add up to one 96d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams normalizeFactor = 1.0f / normalizeFactor; 97c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (r = -mIradius; r <= mIradius; r ++) { 98c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mFp[r + mIradius] *= normalizeFactor; 99446788007efe0a673d0366284026adfa17b36fedSimon Hosie mIp[r + mIradius] = (uint16_t)(mFp[r + mIradius] * 65536.0f + 0.5f); 100d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 101d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 102d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 103709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::setGlobalObj(uint32_t slot, ObjectBase *data) { 104d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 1); 105c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mAlloc.set(static_cast<Allocation *>(data)); 106d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 107d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 108709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 109d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 0); 110c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mRadius = ((const float *)data)[0]; 111709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ComputeGaussianWeights(); 112d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 113d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 114d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 115d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 116c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVU4(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y, 117c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 118d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 119d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams const uchar *pi = ptrIn + x*4; 120d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 121d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 blurredPixel = 0; 122d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for (int r = -iradius; r <= iradius; r ++) { 123d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int validY = rsMax((y + r), 0); 124d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams validY = rsMin(validY, (int)(p->dimY - 1)); 125d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; 126d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 pf = convert_float4(pvy[0]); 127d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams blurredPixel += pf * gPtr[0]; 128d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams gPtr++; 129d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 130d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 131d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out->xyzw = blurredPixel; 132d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 133d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 134c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVU1(const RsForEachStubParamStruct *p, float *out, int32_t x, int32_t y, 135c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 136e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 137c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *pi = ptrIn + x; 138c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 139c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 140c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = -iradius; r <= iradius; r ++) { 141c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int validY = rsMax((y + r), 0); 142c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams validY = rsMin(validY, (int)(p->dimY - 1)); 143c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = (float)pi[validY * iStride]; 144c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gPtr[0]; 145c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gPtr++; 146c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 147c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 148c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = blurredPixel; 149c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 150c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 151446788007efe0a673d0366284026adfa17b36fedSimon Hosie 152446788007efe0a673d0366284026adfa17b36fedSimon Hosieextern "C" void rsdIntrinsicBlurU1_K(uchar *out, uchar const *in, size_t w, size_t h, 153446788007efe0a673d0366284026adfa17b36fedSimon Hosie size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); 154446788007efe0a673d0366284026adfa17b36fedSimon Hosieextern "C" void rsdIntrinsicBlurU4_K(uchar4 *out, uchar4 const *in, size_t w, size_t h, 155446788007efe0a673d0366284026adfa17b36fedSimon Hosie size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); 156c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 1577b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 1587b7060c61e4182b29186849c5a857ea5f0898e56Rose, Jamesextern "C" void rsdIntrinsicBlurVFU4_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int ct); 1597b7060c61e4182b29186849c5a857ea5f0898e56Rose, Jamesextern "C" void rsdIntrinsicBlurHFU4_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int ct); 1607b7060c61e4182b29186849c5a857ea5f0898e56Rose, Jamesextern "C" void rsdIntrinsicBlurHFU1_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int ct); 1617b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 1627b7060c61e4182b29186849c5a857ea5f0898e56Rose, James 163c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVFU4(float4 *out, 164c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int ct, 165c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int x1, int x2) { 1667b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 1677b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 1687b7060c61e4182b29186849c5a857ea5f0898e56Rose, James int t = (x2 - x1); 1697b7060c61e4182b29186849c5a857ea5f0898e56Rose, James t &= ~1; 1707b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (t) { 1717b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); 1727b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 1737b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += t; 1747b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 1757b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 176e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams while(x2 > x1) { 1772e5ef66f7d88e2f85bbc4f97edf1ed0b6edbaac7Tim Murray const uchar *pi = ptrIn; 178e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams float4 blurredPixel = 0; 179e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams const float* gp = gPtr; 180e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 181e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams for (int r = 0; r < ct; r++) { 182e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams float4 pf = convert_float4(((const uchar4 *)pi)[0]); 183e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams blurredPixel += pf * gp[0]; 184e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams pi += iStride; 185e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams gp++; 186e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 187e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out->xyzw = blurredPixel; 188e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 189e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out++; 1900782188b07ceeca03a45b26873bec0ccfd412373Jason Sams ptrIn+=4; 191e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 192e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams} 193e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 194c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVFU1(float *out, 195c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int ct, int x1, int x2) { 196c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 197c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams int len = x2 - x1; 198c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 199099bc262f862cdeb547cf8a78fe9e0e92560f437Tim Murray while((x2 > x1) && (((uintptr_t)ptrIn) & 0x3)) { 2007079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams const uchar *pi = ptrIn; 2017079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams float blurredPixel = 0; 2027079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams const float* gp = gPtr; 2037079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams 2047079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams for (int r = 0; r < ct; r++) { 2057079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams float pf = (float)pi[0]; 2067079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams blurredPixel += pf * gp[0]; 2077079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams pi += iStride; 2087079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams gp++; 2097079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams } 2107079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams out[0] = blurredPixel; 211c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams x1++; 2127079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams out++; 2137079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams ptrIn++; 214b6d2d2a6bc1cee3e04b7e632a96134bdb56f38eeJason Sams len--; 2157079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams } 2167b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 2177b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD && (x2 > x1)) { 2187b7060c61e4182b29186849c5a857ea5f0898e56Rose, James int t = (x2 - x1) >> 2; 2197b7060c61e4182b29186849c5a857ea5f0898e56Rose, James t &= ~1; 2207b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (t) { 2217b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, 0, t ); 2227b7060c61e4182b29186849c5a857ea5f0898e56Rose, James len -= t << 2; 2237b7060c61e4182b29186849c5a857ea5f0898e56Rose, James ptrIn += t << 2; 2247b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += t << 2; 2257b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 2267b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 2277b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 228b6d2d2a6bc1cee3e04b7e632a96134bdb56f38eeJason Sams while(len > 0) { 229c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *pi = ptrIn; 230c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 231c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float* gp = gPtr; 232c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 233c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = 0; r < ct; r++) { 234c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = (float)pi[0]; 235c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gp[0]; 236c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams pi += iStride; 237c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gp++; 238c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 239c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = blurredPixel; 240c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams len--; 241c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 2427079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams ptrIn++; 243c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 244c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 245c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 246c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneHU4(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x, 247c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float4 *ptrIn, const float* gPtr, int iradius) { 248d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 249d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 blurredPixel = 0; 250d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for (int r = -iradius; r <= iradius; r ++) { 251d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int validX = rsMax((x + r), 0); 252d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams validX = rsMin(validX, (int)(p->dimX - 1)); 253d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 pf = ptrIn[validX]; 254d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams blurredPixel += pf * gPtr[0]; 255d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams gPtr++; 256d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 257d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 258d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out->xyzw = convert_uchar4(blurredPixel); 259d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 260d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 261c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneHU1(const RsForEachStubParamStruct *p, uchar *out, int32_t x, 262c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float *ptrIn, const float* gPtr, int iradius) { 263c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 264c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 265c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = -iradius; r <= iradius; r ++) { 266c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int validX = rsMax((x + r), 0); 267c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams validX = rsMin(validX, (int)(p->dimX - 1)); 268c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = ptrIn[validX]; 269c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gPtr[0]; 270c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gPtr++; 271c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 272c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 273c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = (uchar)blurredPixel; 274c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 275c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 276d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 277c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsvoid RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p, 278c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 279c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep) { 280c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 2812913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines float4 stackbuf[2048]; 2822913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines float4 *buf = &stackbuf[0]; 283709a0978ae141198018ca9769f8d96292a8928e6Jason Sams RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr; 284c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (!cp->mAlloc.get()) { 285b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams ALOGE("Blur executed without input, skipping"); 286b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams return; 287b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams } 288c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 289c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 290d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 291d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uchar4 *out = (uchar4 *)p->out; 292d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = xstart; 293d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = xend; 294d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 295446788007efe0a673d0366284026adfa17b36fedSimon Hosie#if defined(ARCH_ARM_HAVE_VFP) 296446788007efe0a673d0366284026adfa17b36fedSimon Hosie if (gArchUseSIMD) { 297446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * p->y), p->dimX, p->dimY, 298446788007efe0a673d0366284026adfa17b36fedSimon Hosie stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); 299446788007efe0a673d0366284026adfa17b36fedSimon Hosie return; 300446788007efe0a673d0366284026adfa17b36fedSimon Hosie } 301446788007efe0a673d0366284026adfa17b36fedSimon Hosie#endif 302446788007efe0a673d0366284026adfa17b36fedSimon Hosie 303c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (p->dimX > 2048) { 304c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if ((p->dimX > cp->mScratchSize[p->lid]) || !cp->mScratch[p->lid]) { 30575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams // Pad the side of the allocation by one unit to allow alignment later 30675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], (p->dimX + 1) * 16); 307c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams cp->mScratchSize[p->lid] = p->dimX; 308c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 30975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams // realloc only aligns to 8 bytes so we manually align to 16. 31075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams buf = (float4 *) ((((intptr_t)cp->mScratch[p->lid]) + 15) & ~0xf); 311c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 312d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 *fout = (float4 *)buf; 313e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams int y = p->y; 314c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius))) { 315c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pi = pin + (y - cp->mIradius) * stride; 316c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, x1, x2); 317e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } else { 318e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams while(x2 > x1) { 319c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneVU4(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius); 320e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams fout++; 321e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 322e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 323d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 324d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 325d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1 = xstart; 326c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) { 3272913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius); 328e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out++; 329e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 330e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 3317b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 3327b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 3337b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if ((x1 + cp->mIradius) < x2) { 3347b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurHFU4_K(out, buf - cp->mIradius, cp->mFp, 3357b7060c61e4182b29186849c5a857ea5f0898e56Rose, James cp->mIradius * 2 + 1, x1, x2 - cp->mIradius); 3367b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += (x2 - cp->mIradius) - x1; 3377b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 = x2 - cp->mIradius; 3387b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3397b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3407b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 341d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams while(x2 > x1) { 3422913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius); 343d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out++; 344d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1++; 345d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 346d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 347d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 348c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsvoid RsdCpuScriptIntrinsicBlur::kernelU1(const RsForEachStubParamStruct *p, 349c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 350c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep) { 351c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float buf[4 * 2048]; 352c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr; 353c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (!cp->mAlloc.get()) { 354c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams ALOGE("Blur executed without input, skipping"); 355c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return; 356c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 357c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 358c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 359709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 360c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uchar *out = (uchar *)p->out; 361c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t x1 = xstart; 362c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t x2 = xend; 363c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 364446788007efe0a673d0366284026adfa17b36fedSimon Hosie#if defined(ARCH_ARM_HAVE_VFP) 365446788007efe0a673d0366284026adfa17b36fedSimon Hosie if (gArchUseSIMD) { 366446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsdIntrinsicBlurU1_K(out, pin + stride * p->y, p->dimX, p->dimY, 367446788007efe0a673d0366284026adfa17b36fedSimon Hosie stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); 368446788007efe0a673d0366284026adfa17b36fedSimon Hosie return; 369446788007efe0a673d0366284026adfa17b36fedSimon Hosie } 370446788007efe0a673d0366284026adfa17b36fedSimon Hosie#endif 371446788007efe0a673d0366284026adfa17b36fedSimon Hosie 372c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float *fout = (float *)buf; 373c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int y = p->y; 374ce0351debba8dadd1a7af2b3e926de6d787b49afJason Sams if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius -1))) { 375c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pi = pin + (y - cp->mIradius) * stride; 376c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, x1, x2); 377c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } else { 378c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams while(x2 > x1) { 379c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneVU1(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius); 380c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams fout++; 381c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 382c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 383c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 384c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 385c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1 = xstart; 3867079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams while ((x1 < x2) && 387099bc262f862cdeb547cf8a78fe9e0e92560f437Tim Murray ((x1 < (uint32_t)cp->mIradius) || (((uintptr_t)out) & 0x3))) { 388c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius); 389c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 390c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 391c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 3927b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 3937b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 3947b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if ((x1 + cp->mIradius) < x2) { 3957b7060c61e4182b29186849c5a857ea5f0898e56Rose, James uint32_t len = x2 - (x1 + cp->mIradius); 3967b7060c61e4182b29186849c5a857ea5f0898e56Rose, James len &= ~3; 3977b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (len > 0) { 3987b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurHFU1_K(out, ((float *)buf) - cp->mIradius, cp->mFp, 3997b7060c61e4182b29186849c5a857ea5f0898e56Rose, James cp->mIradius * 2 + 1, x1, x1 + len); 4007b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += len; 4017b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += len; 4027b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4037b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4047b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4057b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 406c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams while(x2 > x1) { 407c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius); 408c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 409c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 410c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 411c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 412c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 413c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptIntrinsicBlur::RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, 414c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const Script *s, const Element *e) 415c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) { 416c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 417c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = NULL; 418c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams if (e->getType() == RS_TYPE_UNSIGNED_8) { 419c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams switch (e->getVectorSize()) { 420c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams case 1: 421c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = &kernelU1; 422c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams break; 423c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams case 4: 424c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = &kernelU4; 425c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams break; 426c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 427c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 428c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams rsAssert(mRootPtr); 429c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mRadius = 5; 430c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 431c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mScratch = new void *[mCtx->getThreadCount()]; 432c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mScratchSize = new size_t[mCtx->getThreadCount()]; 43375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mScratch, 0, sizeof(void *) * mCtx->getThreadCount()); 43475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mScratchSize, 0, sizeof(size_t) * mCtx->getThreadCount()); 435c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 436709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ComputeGaussianWeights(); 437709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 438709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 439709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicBlur::~RsdCpuScriptIntrinsicBlur() { 440c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams uint32_t threads = mCtx->getThreadCount(); 441c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratch) { 442c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (size_t i = 0; i < threads; i++) { 443c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratch[i]) { 444c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams free(mScratch[i]); 445c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 446c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 447c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams delete []mScratch; 448c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 449c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratchSize) { 450c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams delete []mScratchSize; 451c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 452709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 453709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 454709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::populateScript(Script *s) { 455709a0978ae141198018ca9769f8d96292a8928e6Jason Sams s->mHal.info.exportedVariableCount = 2; 456709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 457709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 458709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::invokeFreeChildren() { 459c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mAlloc.clear(); 460709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 461709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 462d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 463c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 464d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 465c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return new RsdCpuScriptIntrinsicBlur(ctx, s, e); 466d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 467d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 468d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 469