12282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams/*
22282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Copyright (C) 2013 The Android Open Source Project
32282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *
42282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
52282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * you may not use this file except in compliance with the License.
62282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * You may obtain a copy of the License at
72282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *
82282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
92282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *
102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Unless required by applicable law or agreed to in writing, software
112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * See the License for the specific language governing permissions and
142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * limitations under the License.
152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams */
162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsic.h"
182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsicInlines.h"
192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android;
212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android::renderscript;
222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace android {
242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace renderscript {
252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsclass RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samspublic:
29c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void populateScript(Script *) override;
30c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void invokeFreeChildren() override;
312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
32c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
33c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    void setGlobalObj(uint32_t slot, ObjectBase *data) override;
342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
35c060f1435e7b9405f3be8974417fa6f410f03753Stephen Hines    ~RsdCpuScriptIntrinsicHistogram() override;
362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsprotected:
39f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes    void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                   Allocation * aout, const void * usr,
412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                   uint32_t usrLen, const RsScriptCall *sc);
42f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes    void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                    Allocation * aout, const void * usr,
442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                    uint32_t usrLen, const RsScriptCall *sc);
452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    float mDot[4];
482282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    int mDotI[4];
492282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    int *mSums;
502282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    ObjectBaseRef<Allocation> mAllocOut;
512282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
52b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1U4(const RsExpandKernelDriverInfo *info,
539ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t xstart, uint32_t xend,
549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
55b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1U3(const RsExpandKernelDriverInfo *info,
569ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t xstart, uint32_t xend,
579ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
58b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1U2(const RsExpandKernelDriverInfo *info,
599ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t xstart, uint32_t xend,
609ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
61b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1U1(const RsExpandKernelDriverInfo *info,
629ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t xstart, uint32_t xend,
639ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
642282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
65b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1L4(const RsExpandKernelDriverInfo *info,
6675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                           uint32_t xstart, uint32_t xend,
679ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
68b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1L3(const RsExpandKernelDriverInfo *info,
6975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                           uint32_t xstart, uint32_t xend,
709ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
71b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1L2(const RsExpandKernelDriverInfo *info,
7275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                           uint32_t xstart, uint32_t xend,
739ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
74b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    static void kernelP1L1(const RsExpandKernelDriverInfo *info,
7575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                           uint32_t xstart, uint32_t xend,
769ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                           uint32_t outstep);
7775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
782282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams};
792282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    rsAssert(slot == 1);
852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mAllocOut.set(static_cast<Allocation *>(data));
862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    rsAssert(slot == 0);
902282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    rsAssert(dataLength == 16);
912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    memcpy(mDot, data, 16);
922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
942282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
962282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
992282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
100f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid
101f37121300217d3b39ab66dd9c8881bcbcad932dfChris WailesRsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
102f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                          const Allocation ** ains,
103f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                          uint32_t inLen, Allocation * aout,
104f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                          const void * usr, uint32_t usrLen,
105f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                          const RsScriptCall *sc) {
1062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    const uint32_t threads = mCtx->getThreadCount();
10875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
1092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    switch (slot) {
1112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    case 0:
11275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        switch(vSize) {
113b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 1:
1142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams            mRootPtr = &kernelP1U1;
115b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
116b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 2:
117b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            mRootPtr = &kernelP1U2;
118b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
119b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 3:
120b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            mRootPtr = &kernelP1U3;
12175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            vSize = 4;
122b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
123b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 4:
1242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams            mRootPtr = &kernelP1U4;
125b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
1262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        }
1272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        break;
1282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    case 1:
129f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes        switch(ains[0]->getType()->getElement()->getVectorSize()) {
13075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        case 1:
13175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            mRootPtr = &kernelP1L1;
13275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            break;
13375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        case 2:
13475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            mRootPtr = &kernelP1L2;
13575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            break;
13675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        case 3:
13775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            mRootPtr = &kernelP1L3;
13875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            break;
13975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        case 4:
14075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            mRootPtr = &kernelP1L4;
14175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams            break;
14275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        }
1432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        break;
1442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
14575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
1462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
1472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
148f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailesvoid
149f37121300217d3b39ab66dd9c8881bcbcad932dfChris WailesRsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
150f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                           const Allocation ** ains,
151f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                           uint32_t inLen,  Allocation * aout,
152f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                           const void * usr, uint32_t usrLen,
153f37121300217d3b39ab66dd9c8881bcbcad932dfChris Wailes                                           const RsScriptCall *sc) {
1542282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1552282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
1562282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    uint32_t threads = mCtx->getThreadCount();
1572282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
1582282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
15975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    if (vSize == 3) vSize = 4;
16075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
1612282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    for (uint32_t ct=0; ct < (256 * vSize); ct++) {
1622282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        o[ct] = mSums[ct];
1632282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        for (uint32_t t=1; t < threads; t++) {
1646de1d8375526502b468232d77fce3e957c705137Tim Murray            o[ct] += mSums[ct + (256 * vSize * t)];
1652282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        }
1662282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
1672282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
1682282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
169b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
1702282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                uint32_t xstart, uint32_t xend,
1719ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
1722282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
173b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
174b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
175b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * 4 * info->lid];
1762282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1772282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
178b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[0] << 2)    ] ++;
179b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[1] << 2) + 1] ++;
180b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[2] << 2) + 2] ++;
181b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[3] << 2) + 3] ++;
182b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
183b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    }
184b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams}
185b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
186b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
187b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                                                uint32_t xstart, uint32_t xend,
1889ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
189b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
190b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
191b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
192b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * 4 * info->lid];
193b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
194b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
195b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[0] << 2)    ] ++;
196b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[1] << 2) + 1] ++;
197b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[2] << 2) + 2] ++;
198b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
199b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    }
200b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams}
201b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
202b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
203b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                                                uint32_t xstart, uint32_t xend,
2049ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
205b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
206b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
207b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
208b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * 2 * info->lid];
209b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
210b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
21175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[(in[0] << 1)    ] ++;
21275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[(in[1] << 1) + 1] ++;
213b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
2142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
2152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
217b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
21875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                                                uint32_t xstart, uint32_t xend,
2199ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
2202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
221b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
222b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
223b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * info->lid];
2242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
226b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        int t = (cp->mDotI[0] * in[0]) +
227b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                (cp->mDotI[1] * in[1]) +
228b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                (cp->mDotI[2] * in[2]) +
229b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                (cp->mDotI[3] * in[3]);
23075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[(t + 0x7f) >> 8] ++;
231b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
23275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    }
23375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams}
23475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
235b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
23675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                                                uint32_t xstart, uint32_t xend,
2379ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
23875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
239b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
240b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
241b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * info->lid];
24275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
24375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    for (uint32_t x = xstart; x < xend; x++) {
24475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        int t = (cp->mDotI[0] * in[0]) +
24575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                (cp->mDotI[1] * in[1]) +
24675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                (cp->mDotI[2] * in[2]);
24775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[(t + 0x7f) >> 8] ++;
248b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
24975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    }
25075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams}
25175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
252b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
25375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                                                uint32_t xstart, uint32_t xend,
2549ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
25575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
256b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
257b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
258b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * info->lid];
25975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
26075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    for (uint32_t x = xstart; x < xend; x++) {
26175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        int t = (cp->mDotI[0] * in[0]) +
26275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                (cp->mDotI[1] * in[1]);
26375adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[(t + 0x7f) >> 8] ++;
264b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
26575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    }
26675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams}
26775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
268b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
26975adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams                                                uint32_t xstart, uint32_t xend,
2709ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
27175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
272b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
273b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
274b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * info->lid];
27575adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
27675adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    for (uint32_t x = xstart; x < xend; x++) {
27775adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        int t = (cp->mDotI[0] * in[0]);
27875adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[(t + 0x7f) >> 8] ++;
279b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
2802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
2812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
283b0abb140ac51b93d1a85aadaa63fe057f2d29850David Grossvoid RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
2842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                uint32_t xstart, uint32_t xend,
2859ed79105cc6a8dbfaf959875249f36022cc2c798Chris Wailes                                                uint32_t outstep) {
2862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
287b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
288b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    uchar *in = (uchar *)info->inPtr[0];
289b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross    int * sums = &cp->mSums[256 * info->lid];
29075adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams
29175adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    for (uint32_t x = xstart; x < xend; x++) {
29275adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams        sums[in[0]] ++;
293b0abb140ac51b93d1a85aadaa63fe057f2d29850David Gross        in += info->inStride[0];
29475adb8213f045bf3ffbc5deb1350b36d486e228aJason Sams    }
2952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2962282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
2992282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                     const Script *s, const Element *e)
3006de1d8375526502b468232d77fce3e957c705137Tim Murray            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
3012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
30244bef6fba6244292b751387f3d6c31cca96c28adChris Wailes    mRootPtr = nullptr;
3032282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mSums = new int[256 * 4 * mCtx->getThreadCount()];
3042282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[0] = 0.299f;
3052282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[1] = 0.587f;
3062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[2] = 0.114f;
3072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[3] = 0;
3082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
3092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
3102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
3112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
3122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
3132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
3142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
3152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    if (mSums) {
3162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        delete []mSums;
3172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
3182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
3192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
3202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
3212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    s->mHal.info.exportedVariableCount = 2;
3222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
3232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
3242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
3252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
3262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
3272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
3282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
3292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
3302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
3312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
332