12282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams/*
22282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Copyright (C) 2013 The Android Open Source Project
32282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *
42282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Licensed under the Apache License, Version 2.0 (the "License");
52282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * you may not use this file except in compliance with the License.
62282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * You may obtain a copy of the License at
72282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *
82282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *      http://www.apache.org/licenses/LICENSE-2.0
92282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams *
102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * Unless required by applicable law or agreed to in writing, software
112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * distributed under the License is distributed on an "AS IS" BASIS,
122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * See the License for the specific language governing permissions and
142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams * limitations under the License.
152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams */
162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsic.h"
182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams#include "rsCpuIntrinsicInlines.h"
192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android;
212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsusing namespace android::renderscript;
222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace android {
242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsnamespace renderscript {
252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsclass RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samspublic:
292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    virtual void populateScript(Script *);
302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    virtual void invokeFreeChildren();
312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    virtual ~RsdCpuScriptIntrinsicHistogram();
362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsprotected:
392282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    void preLaunch(uint32_t slot, const Allocation * ain,
402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                   Allocation * aout, const void * usr,
412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                   uint32_t usrLen, const RsScriptCall *sc);
422282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    void postLaunch(uint32_t slot, const Allocation * ain,
432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                    Allocation * aout, const void * usr,
442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                    uint32_t usrLen, const RsScriptCall *sc);
452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
462282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
472282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    float mDot[4];
482282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    int mDotI[4];
492282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    int *mSums;
502282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    ObjectBaseRef<Allocation> mAllocOut;
512282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
522282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    static void kernelP1U4(const RsForEachStubParamStruct *p,
532282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                          uint32_t xstart, uint32_t xend,
542282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                          uint32_t instep, uint32_t outstep);
55b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    static void kernelP1U3(const RsForEachStubParamStruct *p,
56b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                          uint32_t xstart, uint32_t xend,
57b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                          uint32_t instep, uint32_t outstep);
58b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    static void kernelP1U2(const RsForEachStubParamStruct *p,
59b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                          uint32_t xstart, uint32_t xend,
60b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                          uint32_t instep, uint32_t outstep);
612282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    static void kernelP1L(const RsForEachStubParamStruct *p,
622282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                          uint32_t xstart, uint32_t xend,
632282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                          uint32_t instep, uint32_t outstep);
642282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    static void kernelP1U1(const RsForEachStubParamStruct *p,
652282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                          uint32_t xstart, uint32_t xend,
662282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                          uint32_t instep, uint32_t outstep);
672282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
682282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams};
692282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
702282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
712282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
722282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
732282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
742282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    rsAssert(slot == 1);
752282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mAllocOut.set(static_cast<Allocation *>(data));
762282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
772282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
782282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
792282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    rsAssert(slot == 0);
802282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    rsAssert(dataLength == 16);
812282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    memcpy(mDot, data, 16);
822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
902282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain,
912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                      Allocation * aout, const void * usr,
922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                      uint32_t usrLen, const RsScriptCall *sc) {
932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
942282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    const uint32_t threads = mCtx->getThreadCount();
952282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    const uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
962282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
972282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    switch (slot) {
982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    case 0:
99b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        switch(mAllocOut->getType()->getElement()->getVectorSize()) {
100b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 1:
1012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams            mRootPtr = &kernelP1U1;
102b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
103b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 2:
104b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            mRootPtr = &kernelP1U2;
105b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
106b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 3:
107b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            mRootPtr = &kernelP1U3;
108b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
109b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        case 4:
1102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams            mRootPtr = &kernelP1U4;
111b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams            break;
1122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        }
1132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        break;
1142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    case 1:
1152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        mRootPtr = &kernelP1L;
1162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        break;
1172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
1182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    memset(mSums, 0, 256 * 4 * threads * vSize);
1192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
1202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
1222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                       Allocation * aout, const void * usr,
1232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                       uint32_t usrLen, const RsScriptCall *sc) {
1242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
1262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    uint32_t threads = mCtx->getThreadCount();
1272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
1282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    for (uint32_t ct=0; ct < (256 * vSize); ct++) {
1302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        o[ct] = mSums[ct];
1312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        for (uint32_t t=1; t < threads; t++) {
1322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams            o[ct] += mSums[ct + 256 * vSize];
1332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        }
1342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
1352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
1362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
1382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                uint32_t xstart, uint32_t xend,
1392282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                uint32_t instep, uint32_t outstep) {
1402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
142b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    uchar *in = (uchar *)p->in;
1432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    int * sums = &cp->mSums[256 * 4 * p->lid];
1442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
146b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[0] << 2)    ] ++;
147b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[1] << 2) + 1] ++;
148b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[2] << 2) + 2] ++;
149b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[3] << 2) + 3] ++;
150b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        in += 4;
151b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    }
152b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams}
153b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
154b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
155b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                                                uint32_t xstart, uint32_t xend,
156b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                                                uint32_t instep, uint32_t outstep) {
157b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
158b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
159b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    uchar *in = (uchar *)p->in;
160b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    int * sums = &cp->mSums[256 * 4 * p->lid];
161b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
162b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
163b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[0] << 2)    ] ++;
164b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[1] << 2) + 1] ++;
165b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[2] << 2) + 2] ++;
166b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        in += 4;
167b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    }
168b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams}
169b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
170b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
171b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                                                uint32_t xstart, uint32_t xend,
172b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                                                uint32_t instep, uint32_t outstep) {
173b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
174b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
175b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    uchar *in = (uchar *)p->in;
176b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    int * sums = &cp->mSums[256 * 2 * p->lid];
177b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams
178b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
179b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[0] << 2)    ] ++;
180b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        sums[(in[1] << 2) + 1] ++;
181b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        in += 2;
1822282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
1832282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
1842282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1852282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1L(const RsForEachStubParamStruct *p,
1862282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                               uint32_t xstart, uint32_t xend,
1872282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                               uint32_t instep, uint32_t outstep) {
1882282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1892282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
190b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams    uchar *in = (uchar *)p->in;
1912282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    int * sums = &cp->mSums[256 * p->lid];
1922282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
1932282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    for (uint32_t x = xstart; x < xend; x++) {
194b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        int t = (cp->mDotI[0] * in[0]) +
195b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                (cp->mDotI[1] * in[1]) +
196b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                (cp->mDotI[2] * in[2]) +
197b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams                (cp->mDotI[3] * in[3]);
1982282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        sums[t >> 8] ++;
199b68ba7e6f3d1e6440c1409e85a6c3af14fc39ee6Jason Sams        in += 4;
2002282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
2012282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2022282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2032282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
2042282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                uint32_t xstart, uint32_t xend,
2052282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                uint32_t instep, uint32_t outstep) {
2062282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2072282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2082282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2092282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2102282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
2112282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams                                                     const Script *s, const Element *e)
2122282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) {
2132282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2142282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mRootPtr = NULL;
2152282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mSums = new int[256 * 4 * mCtx->getThreadCount()];
2162282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[0] = 0.299f;
2172282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[1] = 0.587f;
2182282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[2] = 0.114f;
2192282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDot[3] = 0;
2202282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
2212282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
2222282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
2232282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
2242282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2252282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2262282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
2272282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    if (mSums) {
2282282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams        delete []mSums;
2292282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    }
2302282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2312282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2322282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
2332282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    s->mHal.info.exportedVariableCount = 2;
2342282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2352282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2362282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Samsvoid RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
2372282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2382282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2392282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2402282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason SamsRsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
2412282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2422282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams    return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
2432282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams}
2442282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
2452282e2816ac5f5de53f9bd4f3ecbdfd6d756d120Jason Sams
246