1d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams/* 2d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Copyright (C) 2012 The Android Open Source Project 3d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 4d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * you may not use this file except in compliance with the License. 6d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * You may obtain a copy of the License at 7d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 8d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * http://www.apache.org/licenses/LICENSE-2.0 9d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 10d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Unless required by applicable law or agreed to in writing, software 11d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * See the License for the specific language governing permissions and 14d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * limitations under the License. 15d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams */ 16d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 17709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsic.h" 18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsicInlines.h" 19d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 20d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android; 21d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android::renderscript; 22d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 23709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace android { 24709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace renderscript { 25709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 27709a0978ae141198018ca9769f8d96292a8928e6Jason Samsclass RsdCpuScriptIntrinsicBlur : public RsdCpuScriptIntrinsic { 28709a0978ae141198018ca9769f8d96292a8928e6Jason Samspublic: 29c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void populateScript(Script *) override; 30c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void invokeFreeChildren() override; 31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 32c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override; 33c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void setGlobalObj(uint32_t slot, ObjectBase *data) override; 34709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 35c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines ~RsdCpuScriptIntrinsicBlur() override; 36c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 37709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 38709a0978ae141198018ca9769f8d96292a8928e6Jason Samsprotected: 39c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float mFp[104]; 40446788007efe0a673d0366284026adfa17b36fedSimon Hosie uint16_t mIp[104]; 41c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams void **mScratch; 42c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams size_t *mScratchSize; 43c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float mRadius; 44c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams int mIradius; 45c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams ObjectBaseRef<Allocation> mAlloc; 46709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 47b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelU4(const RsExpandKernelDriverInfo *info, 48c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 499ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 50b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelU1(const RsExpandKernelDriverInfo *info, 51c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 529ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 53709a0978ae141198018ca9769f8d96292a8928e6Jason Sams void ComputeGaussianWeights(); 54d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}; 55d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 56709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 57709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 58709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 59709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 60709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::ComputeGaussianWeights() { 61c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams memset(mFp, 0, sizeof(mFp)); 62c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams memset(mIp, 0, sizeof(mIp)); 637079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams 64d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // Compute gaussian weights for the blur 65d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // e is the euler's number 668b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // TODO Define these constants only once 67d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float e = 2.718281828459045f; 68d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float pi = 3.1415926535897932f; 698b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // g(x) = (1 / (sqrt(2 * pi) * sigma)) * e ^ (-x^2 / (2 * sigma^2)) 70d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // x is of the form [-radius .. 0 .. radius] 718b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // and sigma varies with the radius. 728b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // Based on some experimental radius values and sigmas, 73d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // we approximately fit sigma = f(radius) as 74d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // sigma = radius * 0.4 + 0.6 75d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // The larger the radius gets, the more our gaussian blur 76d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // will resemble a box blur since with large sigma 77d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // the gaussian curve begins to lose its shape 78c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams float sigma = 0.4f * mRadius + 0.6f; 79d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 80d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // Now compute the coefficients. We will store some redundant values to save 81d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams // some math during the blur calculations precompute some values 82d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float coeff1 = 1.0f / (sqrtf(2.0f * pi) * sigma); 83d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float coeff2 = - 1.0f / (2.0f * sigma * sigma); 84d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 85d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float normalizeFactor = 0.0f; 86d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float floatR = 0.0f; 87d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int r; 88c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mIradius = (float)ceil(mRadius) + 0.5f; 89c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (r = -mIradius; r <= mIradius; r ++) { 90d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams floatR = (float)r; 91c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mFp[r + mIradius] = coeff1 * powf(e, floatR * floatR * coeff2); 92c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams normalizeFactor += mFp[r + mIradius]; 93d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 94d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 958b7117dfbcca8a74dfd83aaff7f82d2d7e3abd8fJean-Luc Brouillet // Now we need to normalize the weights because all our coefficients need to add up to one 96d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams normalizeFactor = 1.0f / normalizeFactor; 97c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (r = -mIradius; r <= mIradius; r ++) { 98c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mFp[r + mIradius] *= normalizeFactor; 99446788007efe0a673d0366284026adfa17b36fedSimon Hosie mIp[r + mIradius] = (uint16_t)(mFp[r + mIradius] * 65536.0f + 0.5f); 100d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 101d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 102d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 103709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::setGlobalObj(uint32_t slot, ObjectBase *data) { 104d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 1); 105c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mAlloc.set(static_cast<Allocation *>(data)); 106d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 107d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 108709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 109d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 0); 110c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mRadius = ((const float *)data)[0]; 111709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ComputeGaussianWeights(); 112d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 113d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 114d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 115d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 116b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneVU4(const RsExpandKernelDriverInfo *info, float4 *out, int32_t x, int32_t y, 117c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 118d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 119d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams const uchar *pi = ptrIn + x*4; 120d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 121d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 blurredPixel = 0; 122d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for (int r = -iradius; r <= iradius; r ++) { 123d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int validY = rsMax((y + r), 0); 124b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross validY = rsMin(validY, (int)(info->dim.y- 1)); 125d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams const uchar4 *pvy = (const uchar4 *)&pi[validY * iStride]; 126d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 pf = convert_float4(pvy[0]); 127d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams blurredPixel += pf * gPtr[0]; 128d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams gPtr++; 129d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 130d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 131d25fef7232a939faaffcdb83a1be28285313c38eJason Sams out[0] = blurredPixel; 132d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 133d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 134b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneVU1(const RsExpandKernelDriverInfo *info, float *out, int32_t x, int32_t y, 135c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int iradius) { 136e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 137c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *pi = ptrIn + x; 138c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 139c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 140c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = -iradius; r <= iradius; r ++) { 141c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int validY = rsMax((y + r), 0); 142b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross validY = rsMin(validY, (int)(info->dim.y - 1)); 143c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = (float)pi[validY * iStride]; 144c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gPtr[0]; 145c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gPtr++; 146c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 147c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 148c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = blurredPixel; 149c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 150c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 151446788007efe0a673d0366284026adfa17b36fedSimon Hosie 152446788007efe0a673d0366284026adfa17b36fedSimon Hosieextern "C" void rsdIntrinsicBlurU1_K(uchar *out, uchar const *in, size_t w, size_t h, 153446788007efe0a673d0366284026adfa17b36fedSimon Hosie size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); 154446788007efe0a673d0366284026adfa17b36fedSimon Hosieextern "C" void rsdIntrinsicBlurU4_K(uchar4 *out, uchar4 const *in, size_t w, size_t h, 155446788007efe0a673d0366284026adfa17b36fedSimon Hosie size_t p, size_t x, size_t y, size_t count, size_t r, uint16_t const *tab); 156c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 1577b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 158ebf0eb95cba9579af7cb67205b94b286f221c4edDan Albertextern void rsdIntrinsicBlurVFU4_K(void *dst, const void *pin, int stride, const void *gptr, int rct, int x1, int ct); 159ebf0eb95cba9579af7cb67205b94b286f221c4edDan Albertextern void rsdIntrinsicBlurHFU4_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int ct); 160ebf0eb95cba9579af7cb67205b94b286f221c4edDan Albertextern void rsdIntrinsicBlurHFU1_K(void *dst, const void *pin, const void *gptr, int rct, int x1, int ct); 1617b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 1627b7060c61e4182b29186849c5a857ea5f0898e56Rose, James 163c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVFU4(float4 *out, 164c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int ct, 165c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int x1, int x2) { 166d25fef7232a939faaffcdb83a1be28285313c38eJason Sams out += x1; 1677b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 1687b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 1697b7060c61e4182b29186849c5a857ea5f0898e56Rose, James int t = (x2 - x1); 1707b7060c61e4182b29186849c5a857ea5f0898e56Rose, James t &= ~1; 1717b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (t) { 1727b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, x1, x1 + t); 1737b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 1747b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += t; 175a0cdfe05905a5a4d2e494665809c8af9d040c116Yong Chen out += t; 176a0cdfe05905a5a4d2e494665809c8af9d040c116Yong Chen ptrIn += t << 2; 1777b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 1787b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 179e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams while(x2 > x1) { 1802e5ef66f7d88e2f85bbc4f97edf1ed0b6edbaac7Tim Murray const uchar *pi = ptrIn; 181e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams float4 blurredPixel = 0; 182e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams const float* gp = gPtr; 183e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 184e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams for (int r = 0; r < ct; r++) { 185e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams float4 pf = convert_float4(((const uchar4 *)pi)[0]); 186e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams blurredPixel += pf * gp[0]; 187e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams pi += iStride; 188e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams gp++; 189e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 190e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out->xyzw = blurredPixel; 191e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 192e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out++; 1930782188b07ceeca03a45b26873bec0ccfd412373Jason Sams ptrIn+=4; 194e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 195e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams} 196e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams 197c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Samsstatic void OneVFU1(float *out, 198c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *ptrIn, int iStride, const float* gPtr, int ct, int x1, int x2) { 199c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 200c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams int len = x2 - x1; 201d25fef7232a939faaffcdb83a1be28285313c38eJason Sams out += x1; 202c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 203099bc262f862cdeb547cf8a78fe9e0e92560f437Tim Murray while((x2 > x1) && (((uintptr_t)ptrIn) & 0x3)) { 2047079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams const uchar *pi = ptrIn; 2057079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams float blurredPixel = 0; 2067079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams const float* gp = gPtr; 2077079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams 2087079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams for (int r = 0; r < ct; r++) { 2097079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams float pf = (float)pi[0]; 2107079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams blurredPixel += pf * gp[0]; 2117079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams pi += iStride; 2127079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams gp++; 2137079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams } 2147079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams out[0] = blurredPixel; 215c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams x1++; 2167079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams out++; 2177079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams ptrIn++; 218b6d2d2a6bc1cee3e04b7e632a96134bdb56f38eeJason Sams len--; 2197079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams } 2207b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 2217b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD && (x2 > x1)) { 2227b7060c61e4182b29186849c5a857ea5f0898e56Rose, James int t = (x2 - x1) >> 2; 2237b7060c61e4182b29186849c5a857ea5f0898e56Rose, James t &= ~1; 2247b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (t) { 2257b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurVFU4_K(out, ptrIn, iStride, gPtr, ct, 0, t ); 2267b7060c61e4182b29186849c5a857ea5f0898e56Rose, James len -= t << 2; 2277b7060c61e4182b29186849c5a857ea5f0898e56Rose, James ptrIn += t << 2; 2287b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += t << 2; 2297b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 2307b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 2317b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 232b6d2d2a6bc1cee3e04b7e632a96134bdb56f38eeJason Sams while(len > 0) { 233c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const uchar *pi = ptrIn; 234c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 235c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float* gp = gPtr; 236c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 237c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = 0; r < ct; r++) { 238c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = (float)pi[0]; 239c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gp[0]; 240c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams pi += iStride; 241c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gp++; 242c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 243c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = blurredPixel; 244c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams len--; 245c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 2467079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams ptrIn++; 247c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 248c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 249c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 250b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneHU4(const RsExpandKernelDriverInfo *info, uchar4 *out, int32_t x, 251c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float4 *ptrIn, const float* gPtr, int iradius) { 252d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 253d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 blurredPixel = 0; 254d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for (int r = -iradius; r <= iradius; r ++) { 255d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams int validX = rsMax((x + r), 0); 256b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross validX = rsMin(validX, (int)(info->dim.x - 1)); 257d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 pf = ptrIn[validX]; 258d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams blurredPixel += pf * gPtr[0]; 259d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams gPtr++; 260d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 261d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 262d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out->xyzw = convert_uchar4(blurredPixel); 263d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 264d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 265b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneHU1(const RsExpandKernelDriverInfo *info, uchar *out, int32_t x, 266c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const float *ptrIn, const float* gPtr, int iradius) { 267c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 268c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float blurredPixel = 0; 269c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams for (int r = -iradius; r <= iradius; r ++) { 270c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams int validX = rsMax((x + r), 0); 271b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross validX = rsMin(validX, (int)(info->dim.x - 1)); 272c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float pf = ptrIn[validX]; 273c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams blurredPixel += pf * gPtr[0]; 274c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams gPtr++; 275c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 276c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 277c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out[0] = (uchar)blurredPixel; 278c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 279c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 280d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 281b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelDriverInfo *info, 282c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 2839ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 284c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 2852913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines float4 stackbuf[2048]; 2862913f381a554c28abb44f49eddd1ee4c68a72578Stephen Hines float4 *buf = &stackbuf[0]; 287b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)info->usr; 288c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (!cp->mAlloc.get()) { 289b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams ALOGE("Blur executed without input, skipping"); 290b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams return; 291b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams } 292c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 293c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 294d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 295b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar4 *out = (uchar4 *)info->outPtr[0]; 296d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = xstart; 297d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = xend; 298d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 299074424a4ac5b093331df2c92e7a5bcbfff136b71Jason Sams#if defined(ARCH_ARM_USE_INTRINSICS) 300d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie if (gArchUseSIMD && info->dim.x >= 4) { 301b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross rsdIntrinsicBlurU4_K(out, (uchar4 const *)(pin + stride * info->current.y), 302b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross info->dim.x, info->dim.y, 303b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); 304446788007efe0a673d0366284026adfa17b36fedSimon Hosie return; 305446788007efe0a673d0366284026adfa17b36fedSimon Hosie } 306446788007efe0a673d0366284026adfa17b36fedSimon Hosie#endif 307446788007efe0a673d0366284026adfa17b36fedSimon Hosie 308b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if (info->dim.x > 2048) { 309b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if ((info->dim.x > cp->mScratchSize[info->lid]) || !cp->mScratch[info->lid]) { 31075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams // Pad the side of the allocation by one unit to allow alignment later 311b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cp->mScratch[info->lid] = realloc(cp->mScratch[info->lid], (info->dim.x + 1) * 16); 312b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross cp->mScratchSize[info->lid] = info->dim.x; 313c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 31475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams // realloc only aligns to 8 bytes so we manually align to 16. 315b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross buf = (float4 *) ((((intptr_t)cp->mScratch[info->lid]) + 15) & ~0xf); 316c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 317d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 *fout = (float4 *)buf; 318b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int y = info->current.y; 319b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if ((y > cp->mIradius) && (y < ((int)info->dim.y - cp->mIradius))) { 320c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pi = pin + (y - cp->mIradius) * stride; 321b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneVFU4(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, info->dim.x); 322e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } else { 323d25fef7232a939faaffcdb83a1be28285313c38eJason Sams x1 = 0; 324b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross while(info->dim.x > x1) { 325b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneVU4(info, fout, x1, y, pin, stride, cp->mFp, cp->mIradius); 326e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams fout++; 327e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 328e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 329d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 330d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 331d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1 = xstart; 332c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams while ((x1 < (uint32_t)cp->mIradius) && (x1 < x2)) { 333b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneHU4(info, out, x1, buf, cp->mFp, cp->mIradius); 334e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams out++; 335e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams x1++; 336e78e514f3f209e594767e8ebc64f5df4be5b0b41Jason Sams } 3377b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 3387b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 3397b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if ((x1 + cp->mIradius) < x2) { 3407b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurHFU4_K(out, buf - cp->mIradius, cp->mFp, 3417b7060c61e4182b29186849c5a857ea5f0898e56Rose, James cp->mIradius * 2 + 1, x1, x2 - cp->mIradius); 3427b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += (x2 - cp->mIradius) - x1; 3437b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 = x2 - cp->mIradius; 3447b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3457b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3467b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 347d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams while(x2 > x1) { 348b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneHU4(info, out, x1, buf, cp->mFp, cp->mIradius); 349d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out++; 350d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1++; 351d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 352d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 353d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 354b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelDriverInfo *info, 355c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t xstart, uint32_t xend, 3569ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 357c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float buf[4 * 2048]; 358b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)info->usr; 359c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (!cp->mAlloc.get()) { 360c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams ALOGE("Blur executed without input, skipping"); 361c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return; 362c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 363c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 364c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 365709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 366b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *out = (uchar *)info->outPtr[0]; 367c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t x1 = xstart; 368c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams uint32_t x2 = xend; 369c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 370074424a4ac5b093331df2c92e7a5bcbfff136b71Jason Sams#if defined(ARCH_ARM_USE_INTRINSICS) 371d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie if (gArchUseSIMD && info->dim.x >= 16) { 372d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie // The specialisation for r<=8 has an awkward prefill case, which is 373d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie // fiddly to resolve, where starting close to the right edge can cause 374d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie // a read beyond the end of input. So avoid that case here. 375d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie if (cp->mIradius > 8 || (info->dim.x - rsMax(0, (int32_t)x1 - 8)) >= 16) { 376d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie rsdIntrinsicBlurU1_K(out, pin + stride * info->current.y, info->dim.x, info->dim.y, 377d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie stride, x1, info->current.y, x2 - x1, cp->mIradius, cp->mIp + cp->mIradius); 378d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie return; 379d708d99149b92fad40fa8e6053ee231afaffe375Simon Hosie } 380446788007efe0a673d0366284026adfa17b36fedSimon Hosie } 381446788007efe0a673d0366284026adfa17b36fedSimon Hosie#endif 382446788007efe0a673d0366284026adfa17b36fedSimon Hosie 383c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams float *fout = (float *)buf; 384b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int y = info->current.y; 385b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross if ((y > cp->mIradius) && (y < ((int)info->dim.y - cp->mIradius -1))) { 386c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams const uchar *pi = pin + (y - cp->mIradius) * stride; 387b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneVFU1(fout, pi, stride, cp->mFp, cp->mIradius * 2 + 1, 0, info->dim.x); 388c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } else { 389d25fef7232a939faaffcdb83a1be28285313c38eJason Sams x1 = 0; 390b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross while(info->dim.x > x1) { 391b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneVU1(info, fout, x1, y, pin, stride, cp->mFp, cp->mIradius); 392c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams fout++; 393c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 394c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 395c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 396c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 397c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1 = xstart; 3987079cd8ddb0874d431446aea305b8c3b10e4f0baJason Sams while ((x1 < x2) && 399099bc262f862cdeb547cf8a78fe9e0e92560f437Tim Murray ((x1 < (uint32_t)cp->mIradius) || (((uintptr_t)out) & 0x3))) { 400b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneHU1(info, out, x1, buf, cp->mFp, cp->mIradius); 401c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 402c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 403c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 4047b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 4057b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD) { 4067b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if ((x1 + cp->mIradius) < x2) { 4077b7060c61e4182b29186849c5a857ea5f0898e56Rose, James uint32_t len = x2 - (x1 + cp->mIradius); 4087b7060c61e4182b29186849c5a857ea5f0898e56Rose, James len &= ~3; 4097b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (len > 0) { 4107b7060c61e4182b29186849c5a857ea5f0898e56Rose, James rsdIntrinsicBlurHFU1_K(out, ((float *)buf) - cp->mIradius, cp->mFp, 4117b7060c61e4182b29186849c5a857ea5f0898e56Rose, James cp->mIradius * 2 + 1, x1, x1 + len); 4127b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += len; 4137b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += len; 4147b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4157b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4167b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 4177b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 418c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams while(x2 > x1) { 419b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneHU1(info, out, x1, buf, cp->mFp, cp->mIradius); 420c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams out++; 421c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams x1++; 422c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 423c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams} 424c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 425c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptIntrinsicBlur::RsdCpuScriptIntrinsicBlur(RsdCpuReferenceImpl *ctx, 426c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const Script *s, const Element *e) 427c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) { 428c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams 42944bef6fba6244292b751387f3d6c31cca96c28adChris Wailes mRootPtr = nullptr; 430c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams if (e->getType() == RS_TYPE_UNSIGNED_8) { 431c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams switch (e->getVectorSize()) { 432c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams case 1: 433c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = &kernelU1; 434c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams break; 435c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams case 4: 436c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams mRootPtr = &kernelU4; 437c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams break; 438c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 439c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams } 440c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams rsAssert(mRootPtr); 441c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mRadius = 5; 442c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 443c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mScratch = new void *[mCtx->getThreadCount()]; 444c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mScratchSize = new size_t[mCtx->getThreadCount()]; 44575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mScratch, 0, sizeof(void *) * mCtx->getThreadCount()); 44675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mScratchSize, 0, sizeof(size_t) * mCtx->getThreadCount()); 447c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams 448709a0978ae141198018ca9769f8d96292a8928e6Jason Sams ComputeGaussianWeights(); 449709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 450709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 451709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicBlur::~RsdCpuScriptIntrinsicBlur() { 452c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams uint32_t threads = mCtx->getThreadCount(); 453c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratch) { 454c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams for (size_t i = 0; i < threads; i++) { 455c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratch[i]) { 456c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams free(mScratch[i]); 457c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 458c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 459c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams delete []mScratch; 460c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 461c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams if (mScratchSize) { 462c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams delete []mScratchSize; 463c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams } 464709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 465709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 466709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::populateScript(Script *s) { 467709a0978ae141198018ca9769f8d96292a8928e6Jason Sams s->mHal.info.exportedVariableCount = 2; 468709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 469709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 470709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicBlur::invokeFreeChildren() { 471c44d6706868749abe37780fc28b2cc627ddcf269Jason Sams mAlloc.clear(); 472709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 473709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 474d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 475c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 476d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 477c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return new RsdCpuScriptIntrinsicBlur(ctx, s, e); 478d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 479