12282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams/* 22282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Copyright (C) 2013 The Android Open Source Project 32282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 42282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 52282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * you may not use this file except in compliance with the License. 62282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * You may obtain a copy of the License at 72282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 82282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 92282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Unless required by applicable law or agreed to in writing, software 112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * See the License for the specific language governing permissions and 142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * limitations under the License. 152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams */ 162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsic.h" 182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsicInlines.h" 192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android; 212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android::renderscript; 222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace android { 242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace renderscript { 252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsclass RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { 282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samspublic: 292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void populateScript(Script *); 302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void invokeFreeChildren(); 312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual ~RsdCpuScriptIntrinsicHistogram(); 362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsprotected: 392282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams void preLaunch(uint32_t slot, const Allocation * ain, 402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc); 422282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams void postLaunch(uint32_t slot, const Allocation * ain, 432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc); 452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams float mDot[4]; 482282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int mDotI[4]; 492282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int *mSums; 502282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams ObjectBaseRef<Allocation> mAllocOut; 512282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 522282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams static void kernelP1U4(const RsForEachStubParamStruct *p, 532282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 542282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t instep, uint32_t outstep); 55b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams static void kernelP1U3(const RsForEachStubParamStruct *p, 56b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 57b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t instep, uint32_t outstep); 58b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams static void kernelP1U2(const RsForEachStubParamStruct *p, 59b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 60b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t instep, uint32_t outstep); 612282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams static void kernelP1L(const RsForEachStubParamStruct *p, 622282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 632282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t instep, uint32_t outstep); 642282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams static void kernelP1U1(const RsForEachStubParamStruct *p, 652282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 662282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t instep, uint32_t outstep); 672282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 682282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}; 692282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 702282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 712282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 722282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 732282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { 742282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(slot == 1); 752282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mAllocOut.set(static_cast<Allocation *>(data)); 762282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 772282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 782282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 792282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(slot == 0); 802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(dataLength == 16); 812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams memcpy(mDot, data, 16); 822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 902282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain, 912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc) { 932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 942282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const uint32_t threads = mCtx->getThreadCount(); 952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 962282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams switch (slot) { 982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams case 0: 99b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams switch(mAllocOut->getType()->getElement()->getVectorSize()) { 100b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 1: 1012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1U1; 102b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 103b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 2: 104b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams mRootPtr = &kernelP1U2; 105b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 106b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 3: 107b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams mRootPtr = &kernelP1U3; 108b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 109b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 4: 1102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1U4; 111b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 1122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams break; 1142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams case 1: 1152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1L; 1162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams break; 1172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams memset(mSums, 0, 256 * 4 * threads * vSize); 1192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain, 1222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 1232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc) { 1242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; 1262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t threads = mCtx->getThreadCount(); 1272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 1282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t ct=0; ct < (256 * vSize); ct++) { 1302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams o[ct] = mSums[ct]; 1312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t t=1; t < threads; t++) { 1322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams o[ct] += mSums[ct + 256 * vSize]; 1332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p, 1382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 1392282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t instep, uint32_t outstep) { 1402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 142b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uchar *in = (uchar *)p->in; 1432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int * sums = &cp->mSums[256 * 4 * p->lid]; 1442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t x = xstart; x < xend; x++) { 146b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 147b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 148b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[2] << 2) + 2] ++; 149b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[3] << 2) + 3] ++; 150b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams in += 4; 151b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams } 152b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams} 153b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 154b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p, 155b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 156b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t instep, uint32_t outstep) { 157b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 158b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 159b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uchar *in = (uchar *)p->in; 160b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int * sums = &cp->mSums[256 * 4 * p->lid]; 161b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 162b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams for (uint32_t x = xstart; x < xend; x++) { 163b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 164b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 165b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[2] << 2) + 2] ++; 166b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams in += 4; 167b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams } 168b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams} 169b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 170b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p, 171b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 172b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t instep, uint32_t outstep) { 173b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 174b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 175b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uchar *in = (uchar *)p->in; 176b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int * sums = &cp->mSums[256 * 2 * p->lid]; 177b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 178b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams for (uint32_t x = xstart; x < xend; x++) { 179b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 180b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 181b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams in += 2; 1822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1L(const RsForEachStubParamStruct *p, 1862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 1872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t instep, uint32_t outstep) { 1882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 190b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uchar *in = (uchar *)p->in; 1912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int * sums = &cp->mSums[256 * p->lid]; 1922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t x = xstart; x < xend; x++) { 194b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int t = (cp->mDotI[0] * in[0]) + 195b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[1] * in[1]) + 196b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[2] * in[2]) + 197b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[3] * in[3]); 1982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams sums[t >> 8] ++; 199b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams in += 4; 2002282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 2012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2022282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2032282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p, 2042282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 2052282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t instep, uint32_t outstep) { 2062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, 2112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const Script *s, const Element *e) 2122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) { 2132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = NULL; 2152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mSums = new int[256 * 4 * mCtx->getThreadCount()]; 2162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[0] = 0.299f; 2172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[1] = 0.587f; 2182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[2] = 0.114f; 2192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[3] = 0; 2202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 2212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 2222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 2232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 2242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { 2272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams if (mSums) { 2282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams delete []mSums; 2292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 2302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { 2332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams s->mHal.info.exportedVariableCount = 2; 2342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { 2372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2392282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 2412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2422282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); 2432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 246