1d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams/* 2d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Copyright (C) 2012 The Android Open Source Project 3d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 4d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * you may not use this file except in compliance with the License. 6d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * You may obtain a copy of the License at 7d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 8d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * http://www.apache.org/licenses/LICENSE-2.0 9d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 10d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Unless required by applicable law or agreed to in writing, software 11d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * See the License for the specific language governing permissions and 14d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * limitations under the License. 15d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams */ 16d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 17d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsic.h" 19709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsicInlines.h" 20d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 21d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android; 22d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android::renderscript; 23d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 24709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace android { 25709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace renderscript { 26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 27709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 28709a0978ae141198018ca9769f8d96292a8928e6Jason Samsclass RsdCpuScriptIntrinsicConvolve5x5 : public RsdCpuScriptIntrinsic { 29709a0978ae141198018ca9769f8d96292a8928e6Jason Samspublic: 30c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void populateScript(Script *) override; 31c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void invokeFreeChildren() override; 32709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 33c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override; 34c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void setGlobalObj(uint32_t slot, ObjectBase *data) override; 35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 36c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines ~RsdCpuScriptIntrinsicConvolve5x5() override; 37c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicConvolve5x5(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 38709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 39709a0978ae141198018ca9769f8d96292a8928e6Jason Samsprotected: 4034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float mFp[28]; 4134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams short mIp[28]; 42d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams ObjectBaseRef<Allocation> alloc; 43709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 44709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 45b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelU1(const RsExpandKernelDriverInfo *info, 4634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 479ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 48b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelU2(const RsExpandKernelDriverInfo *info, 4934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 509ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 51b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelU4(const RsExpandKernelDriverInfo *info, 5234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 539ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 54b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelF1(const RsExpandKernelDriverInfo *info, 5534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 569ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 57b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelF2(const RsExpandKernelDriverInfo *info, 5834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 599ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 60b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelF4(const RsExpandKernelDriverInfo *info, 6134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 629ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 63709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 64709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 65d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}; 66d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 67709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 68d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 69d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 70709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::setGlobalObj(uint32_t slot, ObjectBase *data) { 71709a0978ae141198018ca9769f8d96292a8928e6Jason Sams rsAssert(slot == 1); 72709a0978ae141198018ca9769f8d96292a8928e6Jason Sams alloc.set(static_cast<Allocation *>(data)); 73709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 74d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 75709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::setGlobalVar(uint32_t slot, 76709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const void *data, size_t dataLength) { 77d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 0); 7834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams memcpy (&mFp, data, dataLength); 79d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for(int ct=0; ct < 25; ct++) { 8034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (mFp[ct] >= 0) { 8134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mIp[ct] = (short)(mFp[ct] * 256.f + 0.5f); 8234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } else { 8334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mIp[ct] = (short)(mFp[ct] * 256.f - 0.5f); 8434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 85d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 86d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 87d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 88d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 89b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneU4(const RsExpandKernelDriverInfo *info, uint32_t x, uchar4 *out, 9034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4, 9134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 92d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 93d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 94d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 95d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = x; 96b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1)); 97b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1)); 98d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 99d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 px = convert_float4(py0[x0]) * coeff[0] + 100d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x1]) * coeff[1] + 101d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x2]) * coeff[2] + 102d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x3]) * coeff[3] + 103d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x4]) * coeff[4] + 104d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 105d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x0]) * coeff[5] + 106d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x1]) * coeff[6] + 107d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x2]) * coeff[7] + 108d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x3]) * coeff[8] + 109d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x4]) * coeff[9] + 110d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 111d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x0]) * coeff[10] + 112d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x1]) * coeff[11] + 113d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x2]) * coeff[12] + 114d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x3]) * coeff[13] + 115d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x4]) * coeff[14] + 116d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 117d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x0]) * coeff[15] + 118d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x1]) * coeff[16] + 119d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x2]) * coeff[17] + 120d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x3]) * coeff[18] + 121d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x4]) * coeff[19] + 122d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 123d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x0]) * coeff[20] + 124d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x1]) * coeff[21] + 125d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x2]) * coeff[22] + 126d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x3]) * coeff[23] + 127d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x4]) * coeff[24]; 1284283f579c424f07bc07c7f075398053eed3f8281Miao Wang px = clamp(px + 0.5f, 0.f, 255.f); 12934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = convert_uchar4(px); 13034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 131d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 132b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneU2(const RsExpandKernelDriverInfo *info, uint32_t x, uchar2 *out, 13334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4, 13434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 13534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 13634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 13734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 13834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 139b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1)); 140b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1)); 14134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 14234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float2 px = convert_float2(py0[x0]) * coeff[0] + 14334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x1]) * coeff[1] + 14434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x2]) * coeff[2] + 14534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x3]) * coeff[3] + 14634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x4]) * coeff[4] + 14734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 14834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x0]) * coeff[5] + 14934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x1]) * coeff[6] + 15034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x2]) * coeff[7] + 15134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x3]) * coeff[8] + 15234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x4]) * coeff[9] + 15334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 15434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x0]) * coeff[10] + 15534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x1]) * coeff[11] + 15634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x2]) * coeff[12] + 15734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x3]) * coeff[13] + 15834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x4]) * coeff[14] + 15934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 16034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x0]) * coeff[15] + 16134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x1]) * coeff[16] + 16234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x2]) * coeff[17] + 16334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x3]) * coeff[18] + 16434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x4]) * coeff[19] + 16534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 16634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x0]) * coeff[20] + 16734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x1]) * coeff[21] + 16834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x2]) * coeff[22] + 16934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x3]) * coeff[23] + 17034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x4]) * coeff[24]; 1714283f579c424f07bc07c7f075398053eed3f8281Miao Wang px = clamp(px + 0.5f, 0.f, 255.f); 17234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = convert_uchar2(px); 17334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 17434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 175b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneU1(const RsExpandKernelDriverInfo *info, uint32_t x, uchar *out, 17634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4, 17734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 17834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 17934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 18034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 18134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 182b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1)); 183b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1)); 18434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 18534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float px = (float)(py0[x0]) * coeff[0] + 18634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x1]) * coeff[1] + 18734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x2]) * coeff[2] + 18834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x3]) * coeff[3] + 18934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x4]) * coeff[4] + 19034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 19134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x0]) * coeff[5] + 19234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x1]) * coeff[6] + 19334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x2]) * coeff[7] + 19434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x3]) * coeff[8] + 19534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x4]) * coeff[9] + 19634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 19734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x0]) * coeff[10] + 19834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x1]) * coeff[11] + 19934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x2]) * coeff[12] + 20034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x3]) * coeff[13] + 20134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x4]) * coeff[14] + 20234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 20334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x0]) * coeff[15] + 20434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x1]) * coeff[16] + 20534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x2]) * coeff[17] + 20634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x3]) * coeff[18] + 20734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x4]) * coeff[19] + 20834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 20934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x0]) * coeff[20] + 21034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x1]) * coeff[21] + 21134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x2]) * coeff[22] + 21234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x3]) * coeff[23] + 21334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x4]) * coeff[24]; 2144283f579c424f07bc07c7f075398053eed3f8281Miao Wang px = clamp(px + 0.5f, 0.f, 255.f); 21534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 21634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 21734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 218b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneF4(const RsExpandKernelDriverInfo *info, uint32_t x, float4 *out, 21934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4, 22034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 22134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 22234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 22334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 22434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 225b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1)); 226b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1)); 22734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 22834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float4 px = py0[x0] * coeff[0] + 22934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x1] * coeff[1] + 23034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x2] * coeff[2] + 23134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x3] * coeff[3] + 23234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x4] * coeff[4] + 23334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 23434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x0] * coeff[5] + 23534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x1] * coeff[6] + 23634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x2] * coeff[7] + 23734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x3] * coeff[8] + 23834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x4] * coeff[9] + 23934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 24034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x0] * coeff[10] + 24134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x1] * coeff[11] + 24234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x2] * coeff[12] + 24334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x3] * coeff[13] + 24434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x4] * coeff[14] + 24534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 24634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x0] * coeff[15] + 24734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x1] * coeff[16] + 24834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x2] * coeff[17] + 24934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x3] * coeff[18] + 25034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x4] * coeff[19] + 25134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 25234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x0] * coeff[20] + 25334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x1] * coeff[21] + 25434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x2] * coeff[22] + 25534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x3] * coeff[23] + 25634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x4] * coeff[24]; 25734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 25834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 25934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 260b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneF2(const RsExpandKernelDriverInfo *info, uint32_t x, float2 *out, 26134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4, 26234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 26334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 26434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 26534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 26634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 267b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1)); 268b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1)); 26934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 27034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float2 px = py0[x0] * coeff[0] + 27134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x1] * coeff[1] + 27234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x2] * coeff[2] + 27334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x3] * coeff[3] + 27434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x4] * coeff[4] + 27534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 27634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x0] * coeff[5] + 27734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x1] * coeff[6] + 27834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x2] * coeff[7] + 27934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x3] * coeff[8] + 28034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x4] * coeff[9] + 28134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 28234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x0] * coeff[10] + 28334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x1] * coeff[11] + 28434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x2] * coeff[12] + 28534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x3] * coeff[13] + 28634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x4] * coeff[14] + 28734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 28834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x0] * coeff[15] + 28934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x1] * coeff[16] + 29034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x2] * coeff[17] + 29134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x3] * coeff[18] + 29234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x4] * coeff[19] + 29334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 29434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x0] * coeff[20] + 29534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x1] * coeff[21] + 29634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x2] * coeff[22] + 29734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x3] * coeff[23] + 29834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x4] * coeff[24]; 29934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 300d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 301d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 302b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossstatic void OneF1(const RsExpandKernelDriverInfo *info, uint32_t x, float *out, 30334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py0, const float *py1, const float *py2, const float *py3, const float *py4, 30434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 30534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 30634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 30734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 30834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 309b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(info->dim.x-1)); 310b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(info->dim.x-1)); 31134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 31234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float px = py0[x0] * coeff[0] + 31334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x1] * coeff[1] + 31434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x2] * coeff[2] + 31534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x3] * coeff[3] + 31634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x4] * coeff[4] + 31734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 31834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x0] * coeff[5] + 31934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x1] * coeff[6] + 32034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x2] * coeff[7] + 32134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x3] * coeff[8] + 32234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x4] * coeff[9] + 32334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 32434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x0] * coeff[10] + 32534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x1] * coeff[11] + 32634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x2] * coeff[12] + 32734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x3] * coeff[13] + 32834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x4] * coeff[14] + 32934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 33034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x0] * coeff[15] + 33134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x1] * coeff[16] + 33234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x2] * coeff[17] + 33334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x3] * coeff[18] + 33434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x4] * coeff[19] + 33534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 33634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x0] * coeff[20] + 33734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x1] * coeff[21] + 33834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x2] * coeff[22] + 33934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x3] * coeff[23] + 34034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x4] * coeff[24]; 34134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 34234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 34334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 34434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 345a1b08e2cacf3891fcd6895422c6124887b75975eJason Samsextern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, const void *y1, 346a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams const void *y2, const void *y3, const void *y4, 347a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams const short *coef, uint32_t count); 348a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 349b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelDriverInfo *info, 35034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 3519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 352b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr; 353b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams if (!cp->alloc.get()) { 354b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 355b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams return; 356b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams } 357709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 358709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 359d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 360b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y0 = rsMax((int32_t)info->current.y-2, 0); 361b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y1 = rsMax((int32_t)info->current.y-1, 0); 362b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y2 = info->current.y; 363b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1)); 364b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1)); 365d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 366709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py0 = (const uchar4 *)(pin + stride * y0); 367709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py1 = (const uchar4 *)(pin + stride * y1); 368709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py2 = (const uchar4 *)(pin + stride * y2); 369709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py3 = (const uchar4 *)(pin + stride * y3); 370709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py4 = (const uchar4 *)(pin + stride * y4); 371d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 372b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar4 *out = (uchar4 *)info->outPtr[0]; 373d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = xstart; 374d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = xend; 375d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 376a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams while((x1 < x2) && (x1 < 2)) { 377b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneU4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 378a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams out++; 379a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams x1++; 380a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams } 3817b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#if defined(ARCH_X86_HAVE_SSSE3) 3827b7060c61e4182b29186849c5a857ea5f0898e56Rose, James // for x86 SIMD, require minimum of 7 elements (4 for SIMD, 3837b7060c61e4182b29186849c5a857ea5f0898e56Rose, James // 3 for end boundary where x may hit the end boundary) 3847b7060c61e4182b29186849c5a857ea5f0898e56Rose, James if (gArchUseSIMD &&((x1 + 6) < x2)) { 3857b7060c61e4182b29186849c5a857ea5f0898e56Rose, James // subtract 3 for end boundary 3867b7060c61e4182b29186849c5a857ea5f0898e56Rose, James uint32_t len = (x2 - x1 - 3) >> 2; 38745d29c41b1b9805991dcd8557f6d1b70977f5428Yong Chen rsdIntrinsicConvolve5x5_K(out, py0 + x1 - 2, py1 + x1 - 2, py2 + x1 - 2, py3 + x1 - 2, py4 + x1 - 2, cp->mIp, len); 3887b7060c61e4182b29186849c5a857ea5f0898e56Rose, James out += len << 2; 3897b7060c61e4182b29186849c5a857ea5f0898e56Rose, James x1 += len << 2; 3907b7060c61e4182b29186849c5a857ea5f0898e56Rose, James } 3917b7060c61e4182b29186849c5a857ea5f0898e56Rose, James#endif 392a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 393074424a4ac5b093331df2c92e7a5bcbfff136b71Jason Sams#if defined(ARCH_ARM_USE_INTRINSICS) 394f5ef8df639ba6363aa5d546e57ce872d04144cb6Jason Sams if(gArchUseSIMD && ((x1 + 3) < x2)) { 395a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams uint32_t len = (x2 - x1 - 3) >> 1; 396de52a834dbcb2a3196948e7b9f67d395493ea9a4Jason Sams rsdIntrinsicConvolve5x5_K(out, py0 + x1 - 2, py1 + x1 - 2, py2 + x1 - 2, py3 + x1 - 2, py4 + x1 - 2, cp->mIp, len); 39734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 39834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 39934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 40034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 40134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 40234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 403b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneU4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 40434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 40534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 40634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 40734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 40834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 409b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelDriverInfo *info, 41034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 4119ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 412b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr; 41334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 41434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 41534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 41634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 41734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 41834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 41934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 420b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y0 = rsMax((int32_t)info->current.y-2, 0); 421b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y1 = rsMax((int32_t)info->current.y-1, 0); 422b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y2 = info->current.y; 423b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1)); 424b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1)); 42534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 42634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py0 = (const uchar2 *)(pin + stride * y0); 42734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py1 = (const uchar2 *)(pin + stride * y1); 42834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py2 = (const uchar2 *)(pin + stride * y2); 42934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py3 = (const uchar2 *)(pin + stride * y3); 43034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py4 = (const uchar2 *)(pin + stride * y4); 43134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 432b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar2 *out = (uchar2 *)info->outPtr[0]; 43334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 43434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 43534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 43634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 437b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneU2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 43834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 43934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 44034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 44134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 44234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 44334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 44434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 445a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 446a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams out += len << 1; 447a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams x1 += len << 1; 448a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams } 449a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams#endif 450a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 451d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams while(x1 < x2) { 452b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneU2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 453d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out++; 454d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1++; 455d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 456d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 457d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 458b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelDriverInfo *info, 45934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 4609ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 461b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr; 46234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 46334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 46434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 46534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 46634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 46734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 46834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 469b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y0 = rsMax((int32_t)info->current.y-2, 0); 470b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y1 = rsMax((int32_t)info->current.y-1, 0); 471b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y2 = info->current.y; 472b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1)); 473b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1)); 47434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 47534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py0 = (const uchar *)(pin + stride * y0); 47634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py1 = (const uchar *)(pin + stride * y1); 47734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py2 = (const uchar *)(pin + stride * y2); 47834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py3 = (const uchar *)(pin + stride * y3); 47934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py4 = (const uchar *)(pin + stride * y4); 48034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 481b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *out = (uchar *)info->outPtr[0]; 48234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 48334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 48434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 48534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 486b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneU1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 48734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 48834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 48934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 49034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 49134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 49234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 49334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 49434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 49534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 49634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 49734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 49834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 49934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 50034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 501b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneU1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 50234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 50334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 50434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 50534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 50634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 507b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelDriverInfo *info, 50834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 5099ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 510b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr; 51134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 51234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 51334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 51434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 51534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 51634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 51734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 518b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y0 = rsMax((int32_t)info->current.y-2, 0); 519b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y1 = rsMax((int32_t)info->current.y-1, 0); 520b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y2 = info->current.y; 521b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1)); 522b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1)); 52334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 52434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py0 = (const float4 *)(pin + stride * y0); 52534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py1 = (const float4 *)(pin + stride * y1); 52634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py2 = (const float4 *)(pin + stride * y2); 52734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py3 = (const float4 *)(pin + stride * y3); 52834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py4 = (const float4 *)(pin + stride * y4); 52934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 530b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross float4 *out = (float4 *)info->outPtr[0]; 53134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 53234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 53334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 53434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 535b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneF4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 53634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 53734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 53834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 53934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 54034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 54134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 54234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 54334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 54434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 54534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 54634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 54734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 54834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 54934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 550b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneF4(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 55134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 55234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 55334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 55434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 55534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 556b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelDriverInfo *info, 55734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 5589ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 559b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr; 56034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 56134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 56234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 56334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 56434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 56534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 56634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 567b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y0 = rsMax((int32_t)info->current.y-2, 0); 568b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y1 = rsMax((int32_t)info->current.y-1, 0); 569b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y2 = info->current.y; 570b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1)); 571b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1)); 57234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 57334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py0 = (const float2 *)(pin + stride * y0); 57434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py1 = (const float2 *)(pin + stride * y1); 57534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py2 = (const float2 *)(pin + stride * y2); 57634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py3 = (const float2 *)(pin + stride * y3); 57734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py4 = (const float2 *)(pin + stride * y4); 57834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 579b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross float2 *out = (float2 *)info->outPtr[0]; 58034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 58134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 58234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 58334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 584b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneF2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 58534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 58634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 58734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 58834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 58934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 59034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 59134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 59234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 59334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 59434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 59534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 59634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 59734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 59834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 599b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneF2(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 60034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 60134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 60234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 60334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 60434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 605b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelDriverInfo *info, 60634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 6079ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 608b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)info->usr; 60934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 61034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 61134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 61234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 61334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 61434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 61534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 616b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y0 = rsMax((int32_t)info->current.y-2, 0); 617b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y1 = rsMax((int32_t)info->current.y-1, 0); 618b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y2 = info->current.y; 619b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y3 = rsMin((int32_t)info->current.y+1, (int32_t)(info->dim.y-1)); 620b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uint32_t y4 = rsMin((int32_t)info->current.y+2, (int32_t)(info->dim.y-1)); 62134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 62234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py0 = (const float *)(pin + stride * y0); 62334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py1 = (const float *)(pin + stride * y1); 62434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py2 = (const float *)(pin + stride * y2); 62534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py3 = (const float *)(pin + stride * y3); 62634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py4 = (const float *)(pin + stride * y4); 62734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 628b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross float *out = (float *)info->outPtr[0]; 62934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 63034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 63134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 63234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 633b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneF1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 63434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 63534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 63634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 63734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 63834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 63934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 64034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 64134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 64234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 64334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 64434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 64534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 64634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 64734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 648b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross OneF1(info, x1, out, py0, py1, py2, py3, py4, cp->mFp); 64934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 65034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 65134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 65234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 653d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 654709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicConvolve5x5::RsdCpuScriptIntrinsicConvolve5x5( 655c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) 656c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5) { 657d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 65834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (e->getType() == RS_TYPE_FLOAT_32) { 65934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams switch(e->getVectorSize()) { 66034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 1: 66134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelF1; 66234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 66334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 2: 66434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelF2; 66534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 66634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 3: 66734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 4: 66834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelF4; 66934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 67034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 67134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } else { 67234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams switch(e->getVectorSize()) { 67334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 1: 67434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelU1; 67534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 67634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 2: 67734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelU2; 67834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 67934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 3: 68034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 4: 68134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelU4; 68234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 68334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 68434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 685ce0351debba8dadd1a7af2b3e926de6d787b49afJason Sams for(int ct=0; ct < 25; ct++) { 68634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mFp[ct] = 1.f / 25.f; 68734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mIp[ct] = (short)(mFp[ct] * 256.f); 688d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 689d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 690d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 691709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicConvolve5x5::~RsdCpuScriptIntrinsicConvolve5x5() { 692709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 693709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 694709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::populateScript(Script *s) { 695709a0978ae141198018ca9769f8d96292a8928e6Jason Sams s->mHal.info.exportedVariableCount = 2; 696709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 697709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 698709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::invokeFreeChildren() { 699709a0978ae141198018ca9769f8d96292a8928e6Jason Sams alloc.clear(); 700709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 701709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 702709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 703c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx, 704c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const Script *s, const Element *e) { 705709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 706c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return new RsdCpuScriptIntrinsicConvolve5x5(ctx, s, e); 707709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 708