rsCpuIntrinsicHistogram.cpp revision 9ed79105cc6a8dbfaf959875249f36022cc2c798
12282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams/* 22282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Copyright (C) 2013 The Android Open Source Project 32282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 42282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 52282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * you may not use this file except in compliance with the License. 62282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * You may obtain a copy of the License at 72282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 82282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 92282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Unless required by applicable law or agreed to in writing, software 112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * See the License for the specific language governing permissions and 142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * limitations under the License. 152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams */ 162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsic.h" 182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsicInlines.h" 192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android; 212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android::renderscript; 222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace android { 242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace renderscript { 252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsclass RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { 282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samspublic: 292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void populateScript(Script *); 302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void invokeFreeChildren(); 312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams virtual ~RsdCpuScriptIntrinsicHistogram(); 362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsprotected: 39f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, 402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc); 42f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, 432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc); 452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams float mDot[4]; 482282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int mDotI[4]; 492282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int *mSums; 502282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams ObjectBaseRef<Allocation> mAllocOut; 512282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 5280ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1U4(const RsExpandKernelParams *p, 539ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 5580ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1U3(const RsExpandKernelParams *p, 569ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 579ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 5880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1U2(const RsExpandKernelParams *p, 599ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 609ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 6180ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1U1(const RsExpandKernelParams *p, 629ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 639ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 642282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 6580ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1L4(const RsExpandKernelParams *p, 6675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 679ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 6880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1L3(const RsExpandKernelParams *p, 6975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 709ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 7180ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1L2(const RsExpandKernelParams *p, 7275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 739ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 7480ef693674f69c0343c41564e30f80e7fb513b60Chris Wailes static void kernelP1L1(const RsExpandKernelParams *p, 7575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 769ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 7775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 782282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}; 792282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { 842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(slot == 1); 852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mAllocOut.set(static_cast<Allocation *>(data)); 862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(slot == 0); 902282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(dataLength == 16); 912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams memcpy(mDot, data, 16); 922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 942282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 962282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 992282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 100f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid 101f37121300217d3b39ab66dd9c8881bcbcad932dfChris WailesRsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, 102f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const Allocation ** ains, 103f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t inLen, Allocation * aout, 104f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const void * usr, uint32_t usrLen, 105f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const RsScriptCall *sc) { 1062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const uint32_t threads = mCtx->getThreadCount(); 10875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 1092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams switch (slot) { 1112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams case 0: 11275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams switch(vSize) { 113b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 1: 1142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1U1; 115b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 116b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 2: 117b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams mRootPtr = &kernelP1U2; 118b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 119b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 3: 120b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams mRootPtr = &kernelP1U3; 12175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams vSize = 4; 122b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 123b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 4: 1242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1U4; 125b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 1262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams break; 1282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams case 1: 129f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes switch(ains[0]->getType()->getElement()->getVectorSize()) { 13075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 1: 13175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L1; 13275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 13375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 2: 13475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L2; 13575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 13675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 3: 13775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L3; 13875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 13975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 4: 14075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L4; 14175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 14275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 1432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams break; 1442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 14575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize); 1462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 148f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid 149f37121300217d3b39ab66dd9c8881bcbcad932dfChris WailesRsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, 150f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const Allocation ** ains, 151f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t inLen, Allocation * aout, 152f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const void * usr, uint32_t usrLen, 153f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const RsScriptCall *sc) { 1542282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1552282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; 1562282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t threads = mCtx->getThreadCount(); 1572282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 1582282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 15975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams if (vSize == 3) vSize = 4; 16075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 1612282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t ct=0; ct < (256 * vSize); ct++) { 1622282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams o[ct] = mSums[ct]; 1632282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t t=1; t < threads; t++) { 1646de1d8375526502b468232d77fce3e957c705137Tim Murray o[ct] += mSums[ct + (256 * vSize * t)]; 1652282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1662282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1672282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1682282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 16980ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p, 1702282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 1719ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 1722282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1732282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 174f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 1752282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int * sums = &cp->mSums[256 * 4 * p->lid]; 1762282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1772282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t x = xstart; x < xend; x++) { 178b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 179b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 180b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[2] << 2) + 2] ++; 181b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[3] << 2) + 3] ++; 182f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 183b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams } 184b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams} 185b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 18680ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p, 187b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 1889ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 189b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 190b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 191f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 192b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int * sums = &cp->mSums[256 * 4 * p->lid]; 193b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 194b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams for (uint32_t x = xstart; x < xend; x++) { 195b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 196b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 197b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[2] << 2) + 2] ++; 198f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 199b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams } 200b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams} 201b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 20280ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p, 203b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 2049ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 205b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 206b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 207f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 208b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int * sums = &cp->mSums[256 * 2 * p->lid]; 209b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 210b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams for (uint32_t x = xstart; x < xend; x++) { 21175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(in[0] << 1) ] ++; 21275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(in[1] << 1) + 1] ++; 213f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 2142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 2152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 21780ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p, 21875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2199ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 2202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 222f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 2232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int * sums = &cp->mSums[256 * p->lid]; 2242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t x = xstart; x < xend; x++) { 226b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int t = (cp->mDotI[0] * in[0]) + 227b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[1] * in[1]) + 228b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[2] * in[2]) + 229b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[3] * in[3]); 23075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 231f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 23275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 23375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams} 23475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 23580ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p, 23675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2379ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 23875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 23975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 240f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 24175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int * sums = &cp->mSums[256 * p->lid]; 24275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 24375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 24475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int t = (cp->mDotI[0] * in[0]) + 24575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams (cp->mDotI[1] * in[1]) + 24675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams (cp->mDotI[2] * in[2]); 24775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 248f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 24975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 25075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams} 25175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 25280ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p, 25375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 25575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 25675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 257f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 25875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int * sums = &cp->mSums[256 * p->lid]; 25975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 26075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 26175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int t = (cp->mDotI[0] * in[0]) + 26275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams (cp->mDotI[1] * in[1]); 26375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 264f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 26575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 26675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams} 26775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 26880ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p, 26975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2709ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 27175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 27275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 273f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 27475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int * sums = &cp->mSums[256 * p->lid]; 27575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 27675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 27775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int t = (cp->mDotI[0] * in[0]); 27875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 279f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 2802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 2812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 28380ef693674f69c0343c41564e30f80e7fb513b60Chris Wailesvoid RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p, 2842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 2859ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 2862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 28775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 288f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uchar *in = (uchar *)p->ins[0]; 28975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int * sums = &cp->mSums[256 * p->lid]; 29075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 29175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 29275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[in[0]] ++; 293f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes in += p->inEStrides[0]; 29475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 2952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2962282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, 2992282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const Script *s, const Element *e) 3006de1d8375526502b468232d77fce3e957c705137Tim Murray : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) { 3012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3022282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = NULL; 3032282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mSums = new int[256 * 4 * mCtx->getThreadCount()]; 3042282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[0] = 0.299f; 3052282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[1] = 0.587f; 3062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[2] = 0.114f; 3072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[3] = 0; 3082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 3092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 3102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 3112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 3122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { 3152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams if (mSums) { 3162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams delete []mSums; 3172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 3182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { 3212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams s->mHal.info.exportedVariableCount = 2; 3222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { 3252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 3292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); 3312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 332