1d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams/* 2d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Copyright (C) 2012 The Android Open Source Project 3d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 4d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * you may not use this file except in compliance with the License. 6d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * You may obtain a copy of the License at 7d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 8d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * http://www.apache.org/licenses/LICENSE-2.0 9d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 10d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Unless required by applicable law or agreed to in writing, software 11d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * See the License for the specific language governing permissions and 14d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * limitations under the License. 15d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams */ 16d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 17709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsic.h" 18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsicInlines.h" 19d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 20d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android; 21d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android::renderscript; 22d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 23709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace android { 24709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace renderscript { 25709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 27709a0978ae141198018ca9769f8d96292a8928e6Jason Samsclass RsdCpuScriptIntrinsicBlur : public RsdCpuScriptIntrinsic { 28709a0978ae141198018ca9769f8d96292a8928e6Jason Samspublic: 29709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void populateScript(Script *); 30709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void invokeFreeChildren(); 31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 32709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 33709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 34709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual ~RsdCpuScriptIntrinsicBlur(); 36c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 37709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 38709a0978ae141198018ca9769f8d96292a8928e6Jason Samsprotected: 39c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float mFp[104]; 40446788007efe0a673d0366284026adfa17b36fedSimon Hosie uint16_t mIp[104]; 41c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams void **mScratch; 42c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams size_t *mScratchSize; 43c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float mRadius; 44c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams int mIradius; 45c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams ObjectBaseRef<Allocation> mAlloc; 46709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 47c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams static void kernelU4(const RsForEachStubParamStruct *p, 48c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 49c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep); 50c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams static void kernelU1(const RsForEachStubParamStruct *p, 51c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 52c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep); 53709a0978ae141198018ca9769f8d96292a8928e6Jason Sams void ComputeGaussianWeights(); 54d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}; 55d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 56709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 57709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 58709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 59709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 60709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::ComputeGaussianWeights() { 61c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams memset(mFp, 0, sizeof(mFp)); 62c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams memset(mIp, 0, sizeof(mIp)); 637079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams 64d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // Compute gaussian weights for the blur 65d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // e is the euler's number 668b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // TODO Define these constants only once 67d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float e = 2.718281828459045f; 68d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float pi = 3.1415926535897932f; 698b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // g(x) = (1 / (sqrt(2 * pi) * sigma)) * e ^ (-x^2 / (2 * sigma^2)) 70d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // x is of the form [-radius .. 0 .. radius] 718b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // and sigma varies with the radius. 728b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // Based on some experimental radius values and sigmas, 73d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // we approximately fit sigma = f(radius) as 74d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // sigma = radius * 0.4 + 0.6 75d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // The larger the radius gets, the more our gaussian blur 76d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // will resemble a box blur since with large sigma 77d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // the gaussian curve begins to lose its shape 78c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float sigma = 0.4f * mRadius + 0.6f; 79d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 80d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // Now compute the coefficients. We will store some redundant values to save 81d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // some math during the blur calculations precompute some values 82d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); 83d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float coeff2 = - 1.0f / (2.0f * sigma * sigma); 84d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 85d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float normalizeFactor = 0.0f; 86d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float floatR = 0.0f; 87d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int r; 88c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mIradius = (float)ceil(mRadius) + 0.5f; 89c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (r = -mIradius; r <= mIradius; r ++) { 90d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams floatR = (float)r; 91c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mFp[r + mIradius] = coeff1 * powf(e, floatR * floatR * coeff2); 92c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams normalizeFactor += mFp[r + mIradius]; 93d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 94d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 958b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // Now we need to normalize the weights because all our coefficients need to add up to one 96d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams normalizeFactor = 1.0f / normalizeFactor; 97c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (r = -mIradius; r <= mIradius; r ++) { 98c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mFp[r + mIradius] *= normalizeFactor; 99446788007efe0a673d0366284026adfa17b36fedSimon Hosie mIp[r + mIradius] = (uint16_t)(mFp[r + mIradius] * 65536.0f + 0.5f); 100d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 101d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 102d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 103709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::setGlobalObj(uint32_t slot, ObjectBase *data) { 104d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 1); 105c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mAlloc.set(static_cast<Allocation *>(data)); 106d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 107d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 108709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 109d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 0); 110c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mRadius = ((const float *)data)[0]; 111709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ComputeGaussianWeights(); 112d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 113d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 114d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 115d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 116c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVU4(const RsForEachStubParamStruct *p, float4 *out, int32_t x, int32_t y, 117c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 118d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 119d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams const uchar *pi = ptrIn + x*4; 120d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 121d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 blurredPixel = 0; 122d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for (int r = -iradius; r <= iradius; r ++) { 123d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int validY = rsMax((y + r), 0); 124d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams validY = rsMin(validY, (int)(p->dimY - 1)); 125d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; 126d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 pf = convert_float4(pvy[0]); 127d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams blurredPixel += pf * gPtr[0]; 128d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams gPtr++; 129d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 130d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 131d25fef7232a939faaffcdb83a1be28285313c38eJason Sams out[0] = blurredPixel; 132d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 133d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 134c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVU1(const RsForEachStubParamStruct *p, float *out, int32_t x, int32_t y, 135c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 136e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 137c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *pi = ptrIn + x; 138c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 139c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 140c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = -iradius; r <= iradius; r ++) { 141c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int validY = rsMax((y + r), 0); 142c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams validY = rsMin(validY, (int)(p->dimY - 1)); 143c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = (float)pi[validY * iStride]; 144c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gPtr[0]; 145c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gPtr++; 146c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 147c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 148c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = blurredPixel; 149c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 150c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 151446788007efe0a673d0366284026adfa17b36fedSimon Hosie 152446788007efe0a673d0366284026adfa17b36fedSimon Hosieextern "C" void rsdIntrinsicBlurU1_K(uchar *out, uchar const *in, size_t w, size_t h, 153446788007efe0a673d0366284026adfa17b36fedSimon Hosie size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); 154446788007efe0a673d0366284026adfa17b36fedSimon Hosieextern "C" void rsdIntrinsicBlurU4_K(uchar4 *out, uchar4 const *in, size_t w, size_t h, 155446788007efe0a673d0366284026adfa17b36fedSimon Hosie size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); 156c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 1577b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 1587b7060c61e4182b29186849c5a857ea5f0898e56Rose, Jamesextern "C" void rsdIntrinsicBlurVFU4_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int ct); 1597b7060c61e4182b29186849c5a857ea5f0898e56Rose, Jamesextern "C" void rsdIntrinsicBlurHFU4_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int ct); 1607b7060c61e4182b29186849c5a857ea5f0898e56Rose, Jamesextern "C" void rsdIntrinsicBlurHFU1_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int ct); 1617b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 1627b7060c61e4182b29186849c5a857ea5f0898e56Rose, James 163c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVFU4(float4 *out, 164c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int ct, 165c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int x1, int x2) { 166d25fef7232a939faaffcdb83a1be28285313c38eJason Sams out += x1; 1677b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 1687b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 1697b7060c61e4182b29186849c5a857ea5f0898e56Rose, James int t = (x2 - x1); 1707b7060c61e4182b29186849c5a857ea5f0898e56Rose, James t &= ~1; 1717b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (t) { 1727b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); 1737b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 1747b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += t; 17533164686a7ac88d4eda38201be4127937e9c12b0Yong Chen out += t; 17633164686a7ac88d4eda38201be4127937e9c12b0Yong Chen ptrIn += t << 2; 1777b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 1787b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 179e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams while(x2 > x1) { 1802e5ef66f7d88e2f85bbc4f97edf1ed0b6edbaac7Tim Murray const uchar *pi = ptrIn; 181e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams float4 blurredPixel = 0; 182e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams const float* gp = gPtr; 183e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 184e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams for (int r = 0; r < ct; r++) { 185e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams float4 pf = convert_float4(((const uchar4 *)pi)[0]); 186e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams blurredPixel += pf * gp[0]; 187e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams pi += iStride; 188e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams gp++; 189e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 190e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out->xyzw = blurredPixel; 191e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 192e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out++; 1930782188b07ceeca03a45b26873bec0ccfd412373Jason Sams ptrIn+=4; 194e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 195e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams} 196e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 197c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVFU1(float *out, 198c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int ct, int x1, int x2) { 199c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 200c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams int len = x2 - x1; 201d25fef7232a939faaffcdb83a1be28285313c38eJason Sams out += x1; 202c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 203099bc262f862cdeb547cf8a78fe9e0e92560f437Tim Murray while((x2 > x1) && (((uintptr_t)ptrIn) & 0x3)) { 2047079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams const uchar *pi = ptrIn; 2057079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams float blurredPixel = 0; 2067079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams const float* gp = gPtr; 2077079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams 2087079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams for (int r = 0; r < ct; r++) { 2097079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams float pf = (float)pi[0]; 2107079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams blurredPixel += pf * gp[0]; 2117079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams pi += iStride; 2127079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams gp++; 2137079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams } 2147079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams out[0] = blurredPixel; 215c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams x1++; 2167079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams out++; 2177079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams ptrIn++; 218b6d2d2a6bc1cee3e04b7e632a96134bdb56f38eeJason Sams len--; 2197079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams } 2207b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 2217b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD && (x2 > x1)) { 2227b7060c61e4182b29186849c5a857ea5f0898e56Rose, James int t = (x2 - x1) >> 2; 2237b7060c61e4182b29186849c5a857ea5f0898e56Rose, James t &= ~1; 2247b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (t) { 2257b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, 0, t ); 2267b7060c61e4182b29186849c5a857ea5f0898e56Rose, James len -= t << 2; 2277b7060c61e4182b29186849c5a857ea5f0898e56Rose, James ptrIn += t << 2; 2287b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += t << 2; 2297b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 2307b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 2317b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 232b6d2d2a6bc1cee3e04b7e632a96134bdb56f38eeJason Sams while(len > 0) { 233c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *pi = ptrIn; 234c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 235c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float* gp = gPtr; 236c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 237c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = 0; r < ct; r++) { 238c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = (float)pi[0]; 239c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gp[0]; 240c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams pi += iStride; 241c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gp++; 242c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 243c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = blurredPixel; 244c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams len--; 245c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 2467079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams ptrIn++; 247c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 248c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 249c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 250c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneHU4(const RsForEachStubParamStruct *p, uchar4 *out, int32_t x, 251c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float4 *ptrIn, const float* gPtr, int iradius) { 252d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 253d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 blurredPixel = 0; 254d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for (int r = -iradius; r <= iradius; r ++) { 255d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int validX = rsMax((x + r), 0); 256d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams validX = rsMin(validX, (int)(p->dimX - 1)); 257d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 pf = ptrIn[validX]; 258d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams blurredPixel += pf * gPtr[0]; 259d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams gPtr++; 260d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 261d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 262d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out->xyzw = convert_uchar4(blurredPixel); 263d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 264d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 265c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneHU1(const RsForEachStubParamStruct *p, uchar *out, int32_t x, 266c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float *ptrIn, const float* gPtr, int iradius) { 267c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 268c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 269c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = -iradius; r <= iradius; r ++) { 270c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int validX = rsMax((x + r), 0); 271c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams validX = rsMin(validX, (int)(p->dimX - 1)); 272c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = ptrIn[validX]; 273c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gPtr[0]; 274c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gPtr++; 275c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 276c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 277c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = (uchar)blurredPixel; 278c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 279c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 280d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 281c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsvoid RsdCpuScriptIntrinsicBlur::kernelU4(const RsForEachStubParamStruct *p, 282c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 283c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep) { 284c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 2852913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines float4 stackbuf[2048]; 2862913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines float4 *buf = &stackbuf[0]; 287709a0978ae141198018ca9769f8d96292a8928e6Jason Sams RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr; 288c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (!cp->mAlloc.get()) { 289b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams ALOGE("Blur executed without input, skipping"); 290b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams return; 291b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams } 292c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 293c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 294d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 295d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uchar4 *out = (uchar4 *)p->out; 296d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = xstart; 297d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = xend; 298d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 299074424a4ac5b093331df2c92e7a5bcbfff136b71Jason Sams#if defined(ARCH_ARM_USE_INTRINSICS) 3004c8f2477285848ab0a4f33ad854de9398d332e8cJason Sams if (gArchUseSIMD) { 301446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * p->y), p->dimX, p->dimY, 302446788007efe0a673d0366284026adfa17b36fedSimon Hosie stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); 303446788007efe0a673d0366284026adfa17b36fedSimon Hosie return; 304446788007efe0a673d0366284026adfa17b36fedSimon Hosie } 305446788007efe0a673d0366284026adfa17b36fedSimon Hosie#endif 306446788007efe0a673d0366284026adfa17b36fedSimon Hosie 307c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (p->dimX > 2048) { 308c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if ((p->dimX > cp->mScratchSize[p->lid]) || !cp->mScratch[p->lid]) { 30975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams // Pad the side of the allocation by one unit to allow alignment later 31075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams cp->mScratch[p->lid] = realloc(cp->mScratch[p->lid], (p->dimX + 1) * 16); 311c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams cp->mScratchSize[p->lid] = p->dimX; 312c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 31375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams // realloc only aligns to 8 bytes so we manually align to 16. 31475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams buf = (float4 *) ((((intptr_t)cp->mScratch[p->lid]) + 15) & ~0xf); 315c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 316d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 *fout = (float4 *)buf; 317e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams int y = p->y; 318c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius))) { 319c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pi = pin + (y - cp->mIradius) * stride; 320d25fef7232a939faaffcdb83a1be28285313c38eJason Sams OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX); 321e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } else { 322d25fef7232a939faaffcdb83a1be28285313c38eJason Sams x1 = 0; 323d25fef7232a939faaffcdb83a1be28285313c38eJason Sams while(p->dimX > x1) { 324c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneVU4(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius); 325e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams fout++; 326e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 327e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 328d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 329d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 330d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1 = xstart; 331c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) { 3322913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius); 333e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out++; 334e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 335e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 3367b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 3377b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 3387b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if ((x1 + cp->mIradius) < x2) { 3397b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurHFU4_K(out, buf - cp->mIradius, cp->mFp, 3407b7060c61e4182b29186849c5a857ea5f0898e56Rose, James cp->mIradius * 2 + 1, x1, x2 - cp->mIradius); 3417b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += (x2 - cp->mIradius) - x1; 3427b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 = x2 - cp->mIradius; 3437b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3447b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3457b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 346d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams while(x2 > x1) { 3472913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines OneHU4(p, out, x1, buf, cp->mFp, cp->mIradius); 348d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out++; 349d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1++; 350d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 351d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 352d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 353c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsvoid RsdCpuScriptIntrinsicBlur::kernelU1(const RsForEachStubParamStruct *p, 354c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 355c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t instep, uint32_t outstep) { 356c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float buf[4 * 2048]; 357c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr; 358c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (!cp->mAlloc.get()) { 359c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams ALOGE("Blur executed without input, skipping"); 360c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return; 361c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 362c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 363c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 364709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 365c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uchar *out = (uchar *)p->out; 366c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t x1 = xstart; 367c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t x2 = xend; 368c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 369074424a4ac5b093331df2c92e7a5bcbfff136b71Jason Sams#if defined(ARCH_ARM_USE_INTRINSICS) 3704c8f2477285848ab0a4f33ad854de9398d332e8cJason Sams if (gArchUseSIMD) { 371446788007efe0a673d0366284026adfa17b36fedSimon Hosie rsdIntrinsicBlurU1_K(out, pin + stride * p->y, p->dimX, p->dimY, 3724c8f2477285848ab0a4f33ad854de9398d332e8cJason Sams stride, x1, p->y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); 373446788007efe0a673d0366284026adfa17b36fedSimon Hosie return; 374446788007efe0a673d0366284026adfa17b36fedSimon Hosie } 375446788007efe0a673d0366284026adfa17b36fedSimon Hosie#endif 376446788007efe0a673d0366284026adfa17b36fedSimon Hosie 377c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float *fout = (float *)buf; 378c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int y = p->y; 379ce0351debba8dadd1a7af2b3e926de6d787b49afJason Sams if ((y > cp->mIradius) && (y < ((int)p->dimY - cp->mIradius -1))) { 380c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pi = pin + (y - cp->mIradius) * stride; 381d25fef7232a939faaffcdb83a1be28285313c38eJason Sams OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, p->dimX); 382c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } else { 383d25fef7232a939faaffcdb83a1be28285313c38eJason Sams x1 = 0; 384d25fef7232a939faaffcdb83a1be28285313c38eJason Sams while(p->dimX > x1) { 385c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneVU1(p, fout, x1, y, pin, stride, cp->mFp, cp->mIradius); 386c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams fout++; 387c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 388c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 389c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 390c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 391c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1 = xstart; 3927079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams while ((x1 < x2) && 393099bc262f862cdeb547cf8a78fe9e0e92560f437Tim Murray ((x1 < (uint32_t)cp->mIradius) || (((uintptr_t)out) & 0x3))) { 394c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius); 395c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 396c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 397c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 3987b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 3997b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 4007b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if ((x1 + cp->mIradius) < x2) { 4017b7060c61e4182b29186849c5a857ea5f0898e56Rose, James uint32_t len = x2 - (x1 + cp->mIradius); 4027b7060c61e4182b29186849c5a857ea5f0898e56Rose, James len &= ~3; 4037b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (len > 0) { 4047b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurHFU1_K(out, ((float *)buf) - cp->mIradius, cp->mFp, 4057b7060c61e4182b29186849c5a857ea5f0898e56Rose, James cp->mIradius * 2 + 1, x1, x1 + len); 4067b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += len; 4077b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += len; 4087b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4097b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4107b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4117b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 412c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams while(x2 > x1) { 413c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams OneHU1(p, out, x1, buf, cp->mFp, cp->mIradius); 414c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 415c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 416c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 417c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 418c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 419c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptIntrinsicBlur::RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, 420c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const Script *s, const Element *e) 421c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) { 422c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 423c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = NULL; 424c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams if (e->getType() == RS_TYPE_UNSIGNED_8) { 425c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams switch (e->getVectorSize()) { 426c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams case 1: 427c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = &kernelU1; 428c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams break; 429c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams case 4: 430c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = &kernelU4; 431c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams break; 432c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 433c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 434c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams rsAssert(mRootPtr); 435c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mRadius = 5; 436c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 437c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mScratch = new void *[mCtx->getThreadCount()]; 438c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mScratchSize = new size_t[mCtx->getThreadCount()]; 43975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mScratch, 0, sizeof(void *) * mCtx->getThreadCount()); 44075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mScratchSize, 0, sizeof(size_t) * mCtx->getThreadCount()); 441c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 442709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ComputeGaussianWeights(); 443709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 444709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 445709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicBlur::~RsdCpuScriptIntrinsicBlur() { 446c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams uint32_t threads = mCtx->getThreadCount(); 447c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratch) { 448c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (size_t i = 0; i < threads; i++) { 449c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratch[i]) { 450c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams free(mScratch[i]); 451c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 452c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 453c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams delete []mScratch; 454c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 455c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratchSize) { 456c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams delete []mScratchSize; 457c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 458709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 459709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 460709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::populateScript(Script *s) { 461709a0978ae141198018ca9769f8d96292a8928e6Jason Sams s->mHal.info.exportedVariableCount = 2; 462709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 463709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 464709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::invokeFreeChildren() { 465c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mAlloc.clear(); 466709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 467709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 468d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 469c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 470d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 471c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return new RsdCpuScriptIntrinsicBlur(ctx, s, e); 472d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 473d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 474d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 475