1d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams/* 2d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Copyright (C) 2012 The Android Open Source Project 3d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 4d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 5d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * you may not use this file except in compliance with the License. 6d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * You may obtain a copy of the License at 7d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 8d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * http://www.apache.org/licenses/LICENSE-2.0 9d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * 10d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * Unless required by applicable law or agreed to in writing, software 11d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * distributed under the License is distributed on an "AS IS" BASIS, 12d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * See the License for the specific language governing permissions and 14d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams * limitations under the License. 15d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams */ 16d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 17d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 18709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsic.h" 19709a0978ae141198018ca9769f8d96292a8928e6Jason Sams#include "rsCpuIntrinsicInlines.h" 20d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 21d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android; 22d85e283087ecd00760a0d8d0c9d8482cda845efcJason Samsusing namespace android::renderscript; 23d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 24709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace android { 25709a0978ae141198018ca9769f8d96292a8928e6Jason Samsnamespace renderscript { 26709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 27709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 28709a0978ae141198018ca9769f8d96292a8928e6Jason Samsclass RsdCpuScriptIntrinsicConvolve5x5 : public RsdCpuScriptIntrinsic { 29709a0978ae141198018ca9769f8d96292a8928e6Jason Samspublic: 30709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void populateScript(Script *); 31709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void invokeFreeChildren(); 32709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 33709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 34709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 35709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 36709a0978ae141198018ca9769f8d96292a8928e6Jason Sams virtual ~RsdCpuScriptIntrinsicConvolve5x5(); 37c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuScriptIntrinsicConvolve5x5(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 38709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 39709a0978ae141198018ca9769f8d96292a8928e6Jason Samsprotected: 4034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float mFp[28]; 4134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams short mIp[28]; 42d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams ObjectBaseRef<Allocation> alloc; 43709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 44709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 4534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams static void kernelU1(const RsForEachStubParamStruct *p, 4634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 4734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep); 4834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams static void kernelU2(const RsForEachStubParamStruct *p, 4934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 5034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep); 5134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams static void kernelU4(const RsForEachStubParamStruct *p, 5234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 5334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep); 5434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams static void kernelF1(const RsForEachStubParamStruct *p, 5534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 5634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep); 5734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams static void kernelF2(const RsForEachStubParamStruct *p, 5834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 5934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep); 6034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams static void kernelF4(const RsForEachStubParamStruct *p, 6134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 6234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep); 63709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 64709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 65d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams}; 66d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 67709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 68d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 69d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 70709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::setGlobalObj(uint32_t slot, ObjectBase *data) { 71709a0978ae141198018ca9769f8d96292a8928e6Jason Sams rsAssert(slot == 1); 72709a0978ae141198018ca9769f8d96292a8928e6Jason Sams alloc.set(static_cast<Allocation *>(data)); 73709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 74d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 75709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::setGlobalVar(uint32_t slot, 76709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const void *data, size_t dataLength) { 77d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams rsAssert(slot == 0); 7834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams memcpy (&mFp, data, dataLength); 79d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams for(int ct=0; ct < 25; ct++) { 8034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (mFp[ct] >= 0) { 8134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mIp[ct] = (short)(mFp[ct] * 256.f + 0.5f); 8234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } else { 8334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mIp[ct] = (short)(mFp[ct] * 256.f - 0.5f); 8434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 85d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 86d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 87d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 88d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 8934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsstatic void OneU4(const RsForEachStubParamStruct *p, uint32_t x, uchar4 *out, 9034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar4 *py0, const uchar4 *py1, const uchar4 *py2, const uchar4 *py3, const uchar4 *py4, 9134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 92d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 93d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 94d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 95d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = x; 96d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1)); 97d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1)); 98d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 99d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams float4 px = convert_float4(py0[x0]) * coeff[0] + 100d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x1]) * coeff[1] + 101d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x2]) * coeff[2] + 102d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x3]) * coeff[3] + 103d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py0[x4]) * coeff[4] + 104d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 105d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x0]) * coeff[5] + 106d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x1]) * coeff[6] + 107d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x2]) * coeff[7] + 108d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x3]) * coeff[8] + 109d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py1[x4]) * coeff[9] + 110d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 111d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x0]) * coeff[10] + 112d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x1]) * coeff[11] + 113d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x2]) * coeff[12] + 114d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x3]) * coeff[13] + 115d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py2[x4]) * coeff[14] + 116d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 117d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x0]) * coeff[15] + 118d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x1]) * coeff[16] + 119d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x2]) * coeff[17] + 120d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x3]) * coeff[18] + 121d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py3[x4]) * coeff[19] + 122d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 123d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x0]) * coeff[20] + 124d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x1]) * coeff[21] + 125d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x2]) * coeff[22] + 126d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x3]) * coeff[23] + 127d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams convert_float4(py4[x4]) * coeff[24]; 12834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams px = clamp(px, 0.f, 255.f); 12934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = convert_uchar4(px); 13034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 131d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 13234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsstatic void OneU2(const RsForEachStubParamStruct *p, uint32_t x, uchar2 *out, 13334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py0, const uchar2 *py1, const uchar2 *py2, const uchar2 *py3, const uchar2 *py4, 13434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 13534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 13634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 13734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 13834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 13934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1)); 14034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1)); 14134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 14234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float2 px = convert_float2(py0[x0]) * coeff[0] + 14334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x1]) * coeff[1] + 14434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x2]) * coeff[2] + 14534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x3]) * coeff[3] + 14634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py0[x4]) * coeff[4] + 14734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 14834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x0]) * coeff[5] + 14934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x1]) * coeff[6] + 15034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x2]) * coeff[7] + 15134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x3]) * coeff[8] + 15234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py1[x4]) * coeff[9] + 15334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 15434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x0]) * coeff[10] + 15534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x1]) * coeff[11] + 15634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x2]) * coeff[12] + 15734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x3]) * coeff[13] + 15834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py2[x4]) * coeff[14] + 15934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 16034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x0]) * coeff[15] + 16134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x1]) * coeff[16] + 16234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x2]) * coeff[17] + 16334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x3]) * coeff[18] + 16434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py3[x4]) * coeff[19] + 16534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 16634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x0]) * coeff[20] + 16734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x1]) * coeff[21] + 16834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x2]) * coeff[22] + 16934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x3]) * coeff[23] + 17034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams convert_float2(py4[x4]) * coeff[24]; 171d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams px = clamp(px, 0.f, 255.f); 17234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = convert_uchar2(px); 17334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 17434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 17534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsstatic void OneU1(const RsForEachStubParamStruct *p, uint32_t x, uchar *out, 17634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py0, const uchar *py1, const uchar *py2, const uchar *py3, const uchar *py4, 17734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 17834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 17934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 18034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 18134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 18234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1)); 18334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1)); 18434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 18534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float px = (float)(py0[x0]) * coeff[0] + 18634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x1]) * coeff[1] + 18734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x2]) * coeff[2] + 18834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x3]) * coeff[3] + 18934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py0[x4]) * coeff[4] + 19034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 19134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x0]) * coeff[5] + 19234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x1]) * coeff[6] + 19334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x2]) * coeff[7] + 19434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x3]) * coeff[8] + 19534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py1[x4]) * coeff[9] + 19634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 19734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x0]) * coeff[10] + 19834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x1]) * coeff[11] + 19934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x2]) * coeff[12] + 20034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x3]) * coeff[13] + 20134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py2[x4]) * coeff[14] + 20234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 20334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x0]) * coeff[15] + 20434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x1]) * coeff[16] + 20534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x2]) * coeff[17] + 20634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x3]) * coeff[18] + 20734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py3[x4]) * coeff[19] + 20834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 20934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x0]) * coeff[20] + 21034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x1]) * coeff[21] + 21134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x2]) * coeff[22] + 21234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x3]) * coeff[23] + 21334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams (float)(py4[x4]) * coeff[24]; 21434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams px = clamp(px, 0.f, 255.f); 21534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 21634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 21734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 21834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsstatic void OneF4(const RsForEachStubParamStruct *p, uint32_t x, float4 *out, 21934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py0, const float4 *py1, const float4 *py2, const float4 *py3, const float4 *py4, 22034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 22134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 22234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 22334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 22434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 22534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1)); 22634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1)); 22734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 22834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float4 px = py0[x0] * coeff[0] + 22934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x1] * coeff[1] + 23034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x2] * coeff[2] + 23134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x3] * coeff[3] + 23234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x4] * coeff[4] + 23334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 23434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x0] * coeff[5] + 23534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x1] * coeff[6] + 23634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x2] * coeff[7] + 23734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x3] * coeff[8] + 23834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x4] * coeff[9] + 23934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 24034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x0] * coeff[10] + 24134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x1] * coeff[11] + 24234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x2] * coeff[12] + 24334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x3] * coeff[13] + 24434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x4] * coeff[14] + 24534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 24634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x0] * coeff[15] + 24734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x1] * coeff[16] + 24834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x2] * coeff[17] + 24934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x3] * coeff[18] + 25034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x4] * coeff[19] + 25134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 25234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x0] * coeff[20] + 25334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x1] * coeff[21] + 25434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x2] * coeff[22] + 25534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x3] * coeff[23] + 25634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x4] * coeff[24]; 25734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 25834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 25934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 26034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsstatic void OneF2(const RsForEachStubParamStruct *p, uint32_t x, float2 *out, 26134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py0, const float2 *py1, const float2 *py2, const float2 *py3, const float2 *py4, 26234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 26334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 26434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 26534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 26634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 26734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1)); 26834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1)); 26934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 27034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float2 px = py0[x0] * coeff[0] + 27134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x1] * coeff[1] + 27234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x2] * coeff[2] + 27334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x3] * coeff[3] + 27434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x4] * coeff[4] + 27534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 27634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x0] * coeff[5] + 27734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x1] * coeff[6] + 27834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x2] * coeff[7] + 27934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x3] * coeff[8] + 28034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x4] * coeff[9] + 28134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 28234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x0] * coeff[10] + 28334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x1] * coeff[11] + 28434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x2] * coeff[12] + 28534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x3] * coeff[13] + 28634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x4] * coeff[14] + 28734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 28834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x0] * coeff[15] + 28934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x1] * coeff[16] + 29034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x2] * coeff[17] + 29134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x3] * coeff[18] + 29234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x4] * coeff[19] + 29334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 29434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x0] * coeff[20] + 29534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x1] * coeff[21] + 29634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x2] * coeff[22] + 29734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x3] * coeff[23] + 29834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x4] * coeff[24]; 29934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 300d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 301d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 30234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsstatic void OneF1(const RsForEachStubParamStruct *p, uint32_t x, float *out, 30334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py0, const float *py1, const float *py2, const float *py3, const float *py4, 30434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float* coeff) { 30534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 30634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x0 = rsMax((int32_t)x-2, 0); 30734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = rsMax((int32_t)x-1, 0); 30834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = x; 30934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x3 = rsMin((int32_t)x+1, (int32_t)(p->dimX-1)); 31034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x4 = rsMin((int32_t)x+2, (int32_t)(p->dimX-1)); 31134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 31234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float px = py0[x0] * coeff[0] + 31334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x1] * coeff[1] + 31434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x2] * coeff[2] + 31534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x3] * coeff[3] + 31634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py0[x4] * coeff[4] + 31734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 31834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x0] * coeff[5] + 31934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x1] * coeff[6] + 32034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x2] * coeff[7] + 32134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x3] * coeff[8] + 32234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py1[x4] * coeff[9] + 32334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 32434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x0] * coeff[10] + 32534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x1] * coeff[11] + 32634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x2] * coeff[12] + 32734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x3] * coeff[13] + 32834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py2[x4] * coeff[14] + 32934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 33034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x0] * coeff[15] + 33134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x1] * coeff[16] + 33234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x2] * coeff[17] + 33334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x3] * coeff[18] + 33434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py3[x4] * coeff[19] + 33534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 33634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x0] * coeff[20] + 33734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x1] * coeff[21] + 33834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x2] * coeff[22] + 33934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x3] * coeff[23] + 34034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams py4[x4] * coeff[24]; 34134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams *out = px; 34234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 34334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 34434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 345a1b08e2cacf3891fcd6895422c6124887b75975eJason Samsextern "C" void rsdIntrinsicConvolve5x5_K(void *dst, const void *y0, const void *y1, 346a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams const void *y2, const void *y3, const void *y4, 347a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams const short *coef, uint32_t count); 348a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 34934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsForEachStubParamStruct *p, 35034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 35134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep) { 352709a0978ae141198018ca9769f8d96292a8928e6Jason Sams RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; 353b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams if (!cp->alloc.get()) { 354b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 355b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams return; 356b801b949e286275b5d19a33135235ba68d3a19a9Jason Sams } 357709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 358709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 359d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 360d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t y0 = rsMax((int32_t)p->y-2, 0); 361d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t y1 = rsMax((int32_t)p->y-1, 0); 362d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t y2 = p->y; 363d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); 364d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); 365d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 366709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py0 = (const uchar4 *)(pin + stride * y0); 367709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py1 = (const uchar4 *)(pin + stride * y1); 368709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py2 = (const uchar4 *)(pin + stride * y2); 369709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py3 = (const uchar4 *)(pin + stride * y3); 370709a0978ae141198018ca9769f8d96292a8928e6Jason Sams const uchar4 *py4 = (const uchar4 *)(pin + stride * y4); 371d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 372d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uchar4 *out = (uchar4 *)p->out; 373d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x1 = xstart; 374d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams uint32_t x2 = xend; 375d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 376a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams while((x1 < x2) && (x1 < 2)) { 37734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneU4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 378a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams out++; 379a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams x1++; 380a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams } 381a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 382f5ef8df639ba6363aa5d546e57ce872d04144cb6Jason Sams#if defined(ARCH_ARM_HAVE_VFP) 383f5ef8df639ba6363aa5d546e57ce872d04144cb6Jason Sams if(gArchUseSIMD && ((x1 + 3) < x2)) { 384a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams uint32_t len = (x2 - x1 - 3) >> 1; 38534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->mIp, len); 38634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 38734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 38834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 38934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 39034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 39134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 39234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneU4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 39334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 39434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 39534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 39634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 39734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 39834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsForEachStubParamStruct *p, 39934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 40034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep) { 40134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; 40234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 40334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 40434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 40534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 40634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 40734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 40834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 40934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y0 = rsMax((int32_t)p->y-2, 0); 41034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y1 = rsMax((int32_t)p->y-1, 0); 41134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y2 = p->y; 41234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); 41334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); 41434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 41534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py0 = (const uchar2 *)(pin + stride * y0); 41634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py1 = (const uchar2 *)(pin + stride * y1); 41734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py2 = (const uchar2 *)(pin + stride * y2); 41834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py3 = (const uchar2 *)(pin + stride * y3); 41934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar2 *py4 = (const uchar2 *)(pin + stride * y4); 42034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 42134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uchar2 *out = (uchar2 *)p->out; 42234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 42334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 42434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 42534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 42634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneU2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 42734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 42834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 42934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 43034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 43134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 43234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 43334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 434a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 435a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams out += len << 1; 436a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams x1 += len << 1; 437a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams } 438a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams#endif 439a1b08e2cacf3891fcd6895422c6124887b75975eJason Sams 440d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams while(x1 < x2) { 44134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneU2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 442d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams out++; 443d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams x1++; 444d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 445d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 446d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 44734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsForEachStubParamStruct *p, 44834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 44934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep) { 45034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; 45134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 45234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 45334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 45434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 45534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 45634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 45734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 45834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y0 = rsMax((int32_t)p->y-2, 0); 45934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y1 = rsMax((int32_t)p->y-1, 0); 46034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y2 = p->y; 46134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); 46234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); 46334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 46434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py0 = (const uchar *)(pin + stride * y0); 46534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py1 = (const uchar *)(pin + stride * y1); 46634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py2 = (const uchar *)(pin + stride * y2); 46734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py3 = (const uchar *)(pin + stride * y3); 46834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *py4 = (const uchar *)(pin + stride * y4); 46934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 47034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uchar *out = (uchar *)p->out; 47134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 47234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 47334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 47434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 47534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneU1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 47634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 47734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 47834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 47934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 48034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 48134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 48234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 48334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 48434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 48534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 48634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 48734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 48834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 48934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 49034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneU1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 49134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 49234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 49334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 49434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 49534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 49634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsForEachStubParamStruct *p, 49734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 49834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep) { 49934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; 50034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 50134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 50234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 50334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 50434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 50534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 50634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 50734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y0 = rsMax((int32_t)p->y-2, 0); 50834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y1 = rsMax((int32_t)p->y-1, 0); 50934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y2 = p->y; 51034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); 51134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); 51234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 51334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py0 = (const float4 *)(pin + stride * y0); 51434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py1 = (const float4 *)(pin + stride * y1); 51534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py2 = (const float4 *)(pin + stride * y2); 51634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py3 = (const float4 *)(pin + stride * y3); 51734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float4 *py4 = (const float4 *)(pin + stride * y4); 51834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 51934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float4 *out = (float4 *)p->out; 52034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 52134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 52234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 52334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 52434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneF4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 52534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 52634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 52734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 52834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 52934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 53034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 53134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 53234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 53334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 53434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 53534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 53634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 53734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 53834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 53934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneF4(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 54034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 54134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 54234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 54334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 54434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 54534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsForEachStubParamStruct *p, 54634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 54734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep) { 54834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; 54934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 55034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 55134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 55234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 55334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 55434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 55534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 55634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y0 = rsMax((int32_t)p->y-2, 0); 55734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y1 = rsMax((int32_t)p->y-1, 0); 55834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y2 = p->y; 55934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); 56034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); 56134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 56234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py0 = (const float2 *)(pin + stride * y0); 56334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py1 = (const float2 *)(pin + stride * y1); 56434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py2 = (const float2 *)(pin + stride * y2); 56534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py3 = (const float2 *)(pin + stride * y3); 56634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float2 *py4 = (const float2 *)(pin + stride * y4); 56734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 56834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float2 *out = (float2 *)p->out; 56934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 57034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 57134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 57234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 57334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneF2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 57434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 57534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 57634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 57734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 57834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 57934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 58034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 58134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 58234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 58334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 58434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 58534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 58634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 58734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 58834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneF2(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 58934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 59034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 59134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 59234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 59334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 59434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsForEachStubParamStruct *p, 59534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t xstart, uint32_t xend, 59634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t instep, uint32_t outstep) { 59734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr; 59834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (!cp->alloc.get()) { 59934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams ALOGE("Convolve5x5 executed without input, skipping"); 60034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams return; 60134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 60234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const uchar *pin = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr; 60334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const size_t stride = cp->alloc->mHal.drvState.lod[0].stride; 60434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 60534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y0 = rsMax((int32_t)p->y-2, 0); 60634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y1 = rsMax((int32_t)p->y-1, 0); 60734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y2 = p->y; 60834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y3 = rsMin((int32_t)p->y+1, (int32_t)(p->dimY-1)); 60934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t y4 = rsMin((int32_t)p->y+2, (int32_t)(p->dimY-1)); 61034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 61134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py0 = (const float *)(pin + stride * y0); 61234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py1 = (const float *)(pin + stride * y1); 61334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py2 = (const float *)(pin + stride * y2); 61434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py3 = (const float *)(pin + stride * y3); 61534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams const float *py4 = (const float *)(pin + stride * y4); 61634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 61734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams float *out = (float *)p->out; 61834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x1 = xstart; 61934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t x2 = xend; 62034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 62134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while((x1 < x2) && (x1 < 2)) { 62234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneF1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 62334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 62434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 62534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 62634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 62734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#if 0//defined(ARCH_ARM_HAVE_NEON) 62834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if((x1 + 3) < x2) { 62934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams uint32_t len = (x2 - x1 - 3) >> 1; 63034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams rsdIntrinsicConvolve5x5_K(out, py0, py1, py2, py3, py4, cp->ip, len); 63134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out += len << 1; 63234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1 += len << 1; 63334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 63434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams#endif 63534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams 63634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams while(x1 < x2) { 63734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams OneF1(p, x1, out, py0, py1, py2, py3, py4, cp->mFp); 63834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams out++; 63934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams x1++; 64034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 64134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams} 642d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 643709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicConvolve5x5::RsdCpuScriptIntrinsicConvolve5x5( 644c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) 645c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5) { 646d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 64734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams if (e->getType() == RS_TYPE_FLOAT_32) { 64834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams switch(e->getVectorSize()) { 64934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 1: 65034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelF1; 65134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 65234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 2: 65334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelF2; 65434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 65534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 3: 65634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 4: 65734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelF4; 65834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 65934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 66034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } else { 66134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams switch(e->getVectorSize()) { 66234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 1: 66334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelU1; 66434b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 66534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 2: 66634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelU2; 66734b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 66834b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 3: 66934b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams case 4: 67034b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mRootPtr = &kernelU4; 67134b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams break; 67234b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 67334b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams } 674ce0351debba8dadd1a7af2b3e926de6d787b49afJason Sams for(int ct=0; ct < 25; ct++) { 67534b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mFp[ct] = 1.f / 25.f; 67634b0d3119567b992f0f876a2dffc0161bdcef3e6Jason Sams mIp[ct] = (short)(mFp[ct] * 256.f); 677d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams } 678d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams} 679d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 680709a0978ae141198018ca9769f8d96292a8928e6Jason SamsRsdCpuScriptIntrinsicConvolve5x5::~RsdCpuScriptIntrinsicConvolve5x5() { 681709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 682709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 683709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::populateScript(Script *s) { 684709a0978ae141198018ca9769f8d96292a8928e6Jason Sams s->mHal.info.exportedVariableCount = 2; 685709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 686709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 687709a0978ae141198018ca9769f8d96292a8928e6Jason Samsvoid RsdCpuScriptIntrinsicConvolve5x5::invokeFreeChildren() { 688709a0978ae141198018ca9769f8d96292a8928e6Jason Sams alloc.clear(); 689709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 690709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 691709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 692c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx, 693c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams const Script *s, const Element *e) { 694709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 695c905efd76fdcc1b8846b229bf7d991d185a7b4b7Jason Sams return new RsdCpuScriptIntrinsicConvolve5x5(ctx, s, e); 696709a0978ae141198018ca9769f8d96292a8928e6Jason Sams} 697709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 698709a0978ae141198018ca9769f8d96292a8928e6Jason Sams 699d85e283087ecd00760a0d8d0c9d8482cda845efcJason Sams 700