1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuIntrinsic.h"
18#include "rsCpuIntrinsicInlines.h"
19
20using namespace android;
21using namespace android::renderscript;
22
23namespace android {
24namespace renderscript {
25
26
27class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
28public:
29    virtual void populateScript(Script *);
30    virtual void invokeFreeChildren();
31
32    virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
33    virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
34
35    virtual ~RsdCpuScriptIntrinsicHistogram();
36    RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
37
38protected:
39    void preLaunch(uint32_t slot, const Allocation * ain,
40                   Allocation * aout, const void * usr,
41                   uint32_t usrLen, const RsScriptCall *sc);
42    void postLaunch(uint32_t slot, const Allocation * ain,
43                    Allocation * aout, const void * usr,
44                    uint32_t usrLen, const RsScriptCall *sc);
45
46
47    float mDot[4];
48    int mDotI[4];
49    int *mSums;
50    ObjectBaseRef<Allocation> mAllocOut;
51
52    static void kernelP1U4(const RsForEachStubParamStruct *p,
53                          uint32_t xstart, uint32_t xend,
54                          uint32_t instep, uint32_t outstep);
55    static void kernelP1U3(const RsForEachStubParamStruct *p,
56                          uint32_t xstart, uint32_t xend,
57                          uint32_t instep, uint32_t outstep);
58    static void kernelP1U2(const RsForEachStubParamStruct *p,
59                          uint32_t xstart, uint32_t xend,
60                          uint32_t instep, uint32_t outstep);
61    static void kernelP1L(const RsForEachStubParamStruct *p,
62                          uint32_t xstart, uint32_t xend,
63                          uint32_t instep, uint32_t outstep);
64    static void kernelP1U1(const RsForEachStubParamStruct *p,
65                          uint32_t xstart, uint32_t xend,
66                          uint32_t instep, uint32_t outstep);
67
68};
69
70}
71}
72
73void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
74    rsAssert(slot == 1);
75    mAllocOut.set(static_cast<Allocation *>(data));
76}
77
78void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
79    rsAssert(slot == 0);
80    rsAssert(dataLength == 16);
81    memcpy(mDot, data, 16);
82    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
83    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
84    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
85    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
86}
87
88
89
90void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain,
91                                      Allocation * aout, const void * usr,
92                                      uint32_t usrLen, const RsScriptCall *sc) {
93
94    const uint32_t threads = mCtx->getThreadCount();
95    const uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
96
97    switch (slot) {
98    case 0:
99        switch(mAllocOut->getType()->getElement()->getVectorSize()) {
100        case 1:
101            mRootPtr = &kernelP1U1;
102            break;
103        case 2:
104            mRootPtr = &kernelP1U2;
105            break;
106        case 3:
107            mRootPtr = &kernelP1U3;
108            break;
109        case 4:
110            mRootPtr = &kernelP1U4;
111            break;
112        }
113        break;
114    case 1:
115        mRootPtr = &kernelP1L;
116        break;
117    }
118    memset(mSums, 0, 256 * 4 * threads * vSize);
119}
120
121void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
122                                       Allocation * aout, const void * usr,
123                                       uint32_t usrLen, const RsScriptCall *sc) {
124
125    unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
126    uint32_t threads = mCtx->getThreadCount();
127    uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
128
129    for (uint32_t ct=0; ct < (256 * vSize); ct++) {
130        o[ct] = mSums[ct];
131        for (uint32_t t=1; t < threads; t++) {
132            o[ct] += mSums[ct + 256 * vSize];
133        }
134    }
135}
136
137void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
138                                                uint32_t xstart, uint32_t xend,
139                                                uint32_t instep, uint32_t outstep) {
140
141    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
142    uchar *in = (uchar *)p->in;
143    int * sums = &cp->mSums[256 * 4 * p->lid];
144
145    for (uint32_t x = xstart; x < xend; x++) {
146        sums[(in[0] << 2)    ] ++;
147        sums[(in[1] << 2) + 1] ++;
148        sums[(in[2] << 2) + 2] ++;
149        sums[(in[3] << 2) + 3] ++;
150        in += 4;
151    }
152}
153
154void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
155                                                uint32_t xstart, uint32_t xend,
156                                                uint32_t instep, uint32_t outstep) {
157
158    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
159    uchar *in = (uchar *)p->in;
160    int * sums = &cp->mSums[256 * 4 * p->lid];
161
162    for (uint32_t x = xstart; x < xend; x++) {
163        sums[(in[0] << 2)    ] ++;
164        sums[(in[1] << 2) + 1] ++;
165        sums[(in[2] << 2) + 2] ++;
166        in += 4;
167    }
168}
169
170void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
171                                                uint32_t xstart, uint32_t xend,
172                                                uint32_t instep, uint32_t outstep) {
173
174    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
175    uchar *in = (uchar *)p->in;
176    int * sums = &cp->mSums[256 * 2 * p->lid];
177
178    for (uint32_t x = xstart; x < xend; x++) {
179        sums[(in[0] << 2)    ] ++;
180        sums[(in[1] << 2) + 1] ++;
181        in += 2;
182    }
183}
184
185void RsdCpuScriptIntrinsicHistogram::kernelP1L(const RsForEachStubParamStruct *p,
186                                               uint32_t xstart, uint32_t xend,
187                                               uint32_t instep, uint32_t outstep) {
188
189    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
190    uchar *in = (uchar *)p->in;
191    int * sums = &cp->mSums[256 * p->lid];
192
193    for (uint32_t x = xstart; x < xend; x++) {
194        int t = (cp->mDotI[0] * in[0]) +
195                (cp->mDotI[1] * in[1]) +
196                (cp->mDotI[2] * in[2]) +
197                (cp->mDotI[3] * in[3]);
198        sums[t >> 8] ++;
199        in += 4;
200    }
201}
202
203void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
204                                                uint32_t xstart, uint32_t xend,
205                                                uint32_t instep, uint32_t outstep) {
206
207}
208
209
210RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
211                                                     const Script *s, const Element *e)
212            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLUR) {
213
214    mRootPtr = NULL;
215    mSums = new int[256 * 4 * mCtx->getThreadCount()];
216    mDot[0] = 0.299f;
217    mDot[1] = 0.587f;
218    mDot[2] = 0.114f;
219    mDot[3] = 0;
220    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
221    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
222    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
223    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
224}
225
226RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
227    if (mSums) {
228        delete []mSums;
229    }
230}
231
232void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
233    s->mHal.info.exportedVariableCount = 2;
234}
235
236void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
237}
238
239
240RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
241
242    return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
243}
244
245
246