12282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams/* 22282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Copyright (C) 2013 The Android Open Source Project 32282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 42282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Licensed under the Apache License, Version 2.0 (the "License"); 52282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * you may not use this file except in compliance with the License. 62282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * You may obtain a copy of the License at 72282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 82282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * http://www.apache.org/licenses/LICENSE-2.0 92282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * 102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Unless required by applicable law or agreed to in writing, software 112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * distributed under the License is distributed on an "AS IS" BASIS, 122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * See the License for the specific language governing permissions and 142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * limitations under the License. 152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams */ 162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsic.h" 182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsicInlines.h" 192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace android { 212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace renderscript { 222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsclass RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { 252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samspublic: 26c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void populateScript(Script *) override; 27c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void invokeFreeChildren() override; 282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 29c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override; 30c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines void setGlobalObj(uint32_t slot, ObjectBase *data) override; 312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 32c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines ~RsdCpuScriptIntrinsicHistogram() override; 332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsprotected: 36f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, 372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc); 39f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, 402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams Allocation * aout, const void * usr, 412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t usrLen, const RsScriptCall *sc); 422282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams float mDot[4]; 452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int mDotI[4]; 462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams int *mSums; 472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams ObjectBaseRef<Allocation> mAllocOut; 482282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 49b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1U4(const RsExpandKernelDriverInfo *info, 509ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 519ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 52b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1U3(const RsExpandKernelDriverInfo *info, 539ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 55b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1U2(const RsExpandKernelDriverInfo *info, 569ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 579ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 58b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1U1(const RsExpandKernelDriverInfo *info, 599ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t xstart, uint32_t xend, 609ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 612282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 62b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1L4(const RsExpandKernelDriverInfo *info, 6375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 649ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1L3(const RsExpandKernelDriverInfo *info, 6675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 679ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 68b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1L2(const RsExpandKernelDriverInfo *info, 6975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 709ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross static void kernelP1L1(const RsExpandKernelDriverInfo *info, 7275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 739ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep); 7475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 752282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}; 762282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 772282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { 782282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(slot == 1); 792282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mAllocOut.set(static_cast<Allocation *>(data)); 802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(slot == 0); 842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams rsAssert(dataLength == 16); 852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams memcpy(mDot, data, 16); 862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 902282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 94f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid 95f37121300217d3b39ab66dd9c8881bcbcad932dfChris WailesRsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, 96f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const Allocation ** ains, 97f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t inLen, Allocation * aout, 98f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const void * usr, uint32_t usrLen, 99f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const RsScriptCall *sc) { 1002282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const uint32_t threads = mCtx->getThreadCount(); 10275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 1032282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1042282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams switch (slot) { 1052282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams case 0: 10675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams switch(vSize) { 107b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 1: 1082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1U1; 109b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 110b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 2: 111b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams mRootPtr = &kernelP1U2; 112b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 113b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 3: 114b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams mRootPtr = &kernelP1U3; 11575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams vSize = 4; 116b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 117b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams case 4: 1182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mRootPtr = &kernelP1U4; 119b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams break; 1202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams break; 1222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams case 1: 123f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes switch(ains[0]->getType()->getElement()->getVectorSize()) { 12475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 1: 12575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L1; 12675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 12775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 2: 12875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L2; 12975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 13075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 3: 13175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L3; 13275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 13375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams case 4: 13475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams mRootPtr = &kernelP1L4; 13575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams break; 13675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 1372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams break; 1382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 13975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize); 1402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 142f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid 143f37121300217d3b39ab66dd9c8881bcbcad932dfChris WailesRsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, 144f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const Allocation ** ains, 145f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes uint32_t inLen, Allocation * aout, 146f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const void * usr, uint32_t usrLen, 147f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes const RsScriptCall *sc) { 1482282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1492282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; 1502282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t threads = mCtx->getThreadCount(); 1512282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 1522282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 15375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams if (vSize == 3) vSize = 4; 15475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 1552282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t ct=0; ct < (256 * vSize); ct++) { 1562282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams o[ct] = mSums[ct]; 1572282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t t=1; t < threads; t++) { 1586de1d8375526502b468232d77fce3e957c705137Tim Murray o[ct] += mSums[ct + (256 * vSize * t)]; 1592282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1602282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 1612282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 1622282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 163b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info, 1642282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 1659ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 1662282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 167b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 168b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 169b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * 4 * info->lid]; 1702282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 1712282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t x = xstart; x < xend; x++) { 172b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 173b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 174b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[2] << 2) + 2] ++; 175b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[3] << 2) + 3] ++; 176b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 177b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams } 178b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams} 179b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 180b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info, 181b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 1829ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 183b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 184b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 185b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 186b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * 4 * info->lid]; 187b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 188b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams for (uint32_t x = xstart; x < xend; x++) { 189b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[0] << 2) ] ++; 190b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[1] << 2) + 1] ++; 191b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams sums[(in[2] << 2) + 2] ++; 192b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 193b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams } 194b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams} 195b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 196b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info, 197b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams uint32_t xstart, uint32_t xend, 1989ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 199b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 200b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 201b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 202b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * 2 * info->lid]; 203b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams 204b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams for (uint32_t x = xstart; x < xend; x++) { 20575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(in[0] << 1) ] ++; 20675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(in[1] << 1) + 1] ++; 207b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 2082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 2092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 211b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info, 21275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2139ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 2142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 215b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 216b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 217b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * info->lid]; 2182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams for (uint32_t x = xstart; x < xend; x++) { 220b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams int t = (cp->mDotI[0] * in[0]) + 221b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[1] * in[1]) + 222b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[2] * in[2]) + 223b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams (cp->mDotI[3] * in[3]); 22475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 225b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 22675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 22775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams} 22875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 229b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info, 23075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2319ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 23275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 233b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 234b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 235b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * info->lid]; 23675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 23775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 23875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int t = (cp->mDotI[0] * in[0]) + 23975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams (cp->mDotI[1] * in[1]) + 24075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams (cp->mDotI[2] * in[2]); 24175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 242b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 24375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 24475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams} 24575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 246b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info, 24775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2489ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 24975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 250b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 251b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 252b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * info->lid]; 25375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 25475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 25575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int t = (cp->mDotI[0] * in[0]) + 25675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams (cp->mDotI[1] * in[1]); 25775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 258b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 25975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 26075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams} 26175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 262b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info, 26375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams uint32_t xstart, uint32_t xend, 2649ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 26575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 266b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 267b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 268b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * info->lid]; 26975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 27075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 27175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams int t = (cp->mDotI[0] * in[0]); 27275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[(t + 0x7f) >> 8] ++; 273b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 2742282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 2752282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2762282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 277b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info, 2782282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams uint32_t xstart, uint32_t xend, 2799ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes uint32_t outstep) { 2802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 281b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 282b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross uchar *in = (uchar *)info->inPtr[0]; 283b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross int * sums = &cp->mSums[256 * info->lid]; 28475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams 28575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams for (uint32_t x = xstart; x < xend; x++) { 28675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams sums[in[0]] ++; 287b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross in += info->inStride[0]; 28875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams } 2892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 2902282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 2922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, 2932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams const Script *s, const Element *e) 2946de1d8375526502b468232d77fce3e957c705137Tim Murray : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) { 2952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 29644bef6fba6244292b751387f3d6c31cca96c28adChris Wailes mRootPtr = nullptr; 2972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mSums = new int[256 * 4 * mCtx->getThreadCount()]; 2982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[0] = 0.299f; 2992282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[1] = 0.587f; 3002282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[2] = 0.114f; 3012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDot[3] = 0; 3022282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 3032282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 3042282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 3052282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 3062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { 3092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams if (mSums) { 3102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams delete []mSums; 3112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams } 3122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { 3152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams s->mHal.info.exportedVariableCount = 2; 3162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { 3192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 3202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 3222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams 3232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); 3242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams} 325462de21ac2e1773b99aedee012adb374e476ae36Chih-Hung Hsieh 326462de21ac2e1773b99aedee012adb374e476ae36Chih-Hung Hsieh} // namespace renderscript 327462de21ac2e1773b99aedee012adb374e476ae36Chih-Hung Hsieh} // namespace android 328