1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "rsCpuIntrinsic.h"
18#include "rsCpuIntrinsicInlines.h"
19
20namespace android {
21namespace renderscript {
22
23
24class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
25public:
26    void populateScript(Script *) override;
27    void invokeFreeChildren() override;
28
29    void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
30    void setGlobalObj(uint32_t slot, ObjectBase *data) override;
31
32    ~RsdCpuScriptIntrinsicHistogram() override;
33    RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
36    void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
37                   Allocation * aout, const void * usr,
38                   uint32_t usrLen, const RsScriptCall *sc);
39    void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
40                    Allocation * aout, const void * usr,
41                    uint32_t usrLen, const RsScriptCall *sc);
42
43
44    float mDot[4];
45    int mDotI[4];
46    int *mSums;
47    ObjectBaseRef<Allocation> mAllocOut;
48
49    static void kernelP1U4(const RsExpandKernelDriverInfo *info,
50                           uint32_t xstart, uint32_t xend,
51                           uint32_t outstep);
52    static void kernelP1U3(const RsExpandKernelDriverInfo *info,
53                           uint32_t xstart, uint32_t xend,
54                           uint32_t outstep);
55    static void kernelP1U2(const RsExpandKernelDriverInfo *info,
56                           uint32_t xstart, uint32_t xend,
57                           uint32_t outstep);
58    static void kernelP1U1(const RsExpandKernelDriverInfo *info,
59                           uint32_t xstart, uint32_t xend,
60                           uint32_t outstep);
61
62    static void kernelP1L4(const RsExpandKernelDriverInfo *info,
63                           uint32_t xstart, uint32_t xend,
64                           uint32_t outstep);
65    static void kernelP1L3(const RsExpandKernelDriverInfo *info,
66                           uint32_t xstart, uint32_t xend,
67                           uint32_t outstep);
68    static void kernelP1L2(const RsExpandKernelDriverInfo *info,
69                           uint32_t xstart, uint32_t xend,
70                           uint32_t outstep);
71    static void kernelP1L1(const RsExpandKernelDriverInfo *info,
72                           uint32_t xstart, uint32_t xend,
73                           uint32_t outstep);
74
75};
76
77void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
78    rsAssert(slot == 1);
79    mAllocOut.set(static_cast<Allocation *>(data));
80}
81
82void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
83    rsAssert(slot == 0);
84    rsAssert(dataLength == 16);
85    memcpy(mDot, data, 16);
86    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
87    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
88    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
89    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
90}
91
92
93
94void
95RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
96                                          const Allocation ** ains,
97                                          uint32_t inLen, Allocation * aout,
98                                          const void * usr, uint32_t usrLen,
99                                          const RsScriptCall *sc) {
100
101    const uint32_t threads = mCtx->getThreadCount();
102    uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
103
104    switch (slot) {
105    case 0:
106        switch(vSize) {
107        case 1:
108            mRootPtr = &kernelP1U1;
109            break;
110        case 2:
111            mRootPtr = &kernelP1U2;
112            break;
113        case 3:
114            mRootPtr = &kernelP1U3;
115            vSize = 4;
116            break;
117        case 4:
118            mRootPtr = &kernelP1U4;
119            break;
120        }
121        break;
122    case 1:
123        switch(ains[0]->getType()->getElement()->getVectorSize()) {
124        case 1:
125            mRootPtr = &kernelP1L1;
126            break;
127        case 2:
128            mRootPtr = &kernelP1L2;
129            break;
130        case 3:
131            mRootPtr = &kernelP1L3;
132            break;
133        case 4:
134            mRootPtr = &kernelP1L4;
135            break;
136        }
137        break;
138    }
139    memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
140}
141
142void
143RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
144                                           const Allocation ** ains,
145                                           uint32_t inLen,  Allocation * aout,
146                                           const void * usr, uint32_t usrLen,
147                                           const RsScriptCall *sc) {
148
149    unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
150    uint32_t threads = mCtx->getThreadCount();
151    uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
152
153    if (vSize == 3) vSize = 4;
154
155    for (uint32_t ct=0; ct < (256 * vSize); ct++) {
156        o[ct] = mSums[ct];
157        for (uint32_t t=1; t < threads; t++) {
158            o[ct] += mSums[ct + (256 * vSize * t)];
159        }
160    }
161}
162
163void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
164                                                uint32_t xstart, uint32_t xend,
165                                                uint32_t outstep) {
166
167    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
168    uchar *in = (uchar *)info->inPtr[0];
169    int * sums = &cp->mSums[256 * 4 * info->lid];
170
171    for (uint32_t x = xstart; x < xend; x++) {
172        sums[(in[0] << 2)    ] ++;
173        sums[(in[1] << 2) + 1] ++;
174        sums[(in[2] << 2) + 2] ++;
175        sums[(in[3] << 2) + 3] ++;
176        in += info->inStride[0];
177    }
178}
179
180void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
181                                                uint32_t xstart, uint32_t xend,
182                                                uint32_t outstep) {
183
184    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
185    uchar *in = (uchar *)info->inPtr[0];
186    int * sums = &cp->mSums[256 * 4 * info->lid];
187
188    for (uint32_t x = xstart; x < xend; x++) {
189        sums[(in[0] << 2)    ] ++;
190        sums[(in[1] << 2) + 1] ++;
191        sums[(in[2] << 2) + 2] ++;
192        in += info->inStride[0];
193    }
194}
195
196void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
197                                                uint32_t xstart, uint32_t xend,
198                                                uint32_t outstep) {
199
200    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
201    uchar *in = (uchar *)info->inPtr[0];
202    int * sums = &cp->mSums[256 * 2 * info->lid];
203
204    for (uint32_t x = xstart; x < xend; x++) {
205        sums[(in[0] << 1)    ] ++;
206        sums[(in[1] << 1) + 1] ++;
207        in += info->inStride[0];
208    }
209}
210
211void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
212                                                uint32_t xstart, uint32_t xend,
213                                                uint32_t outstep) {
214
215    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
216    uchar *in = (uchar *)info->inPtr[0];
217    int * sums = &cp->mSums[256 * info->lid];
218
219    for (uint32_t x = xstart; x < xend; x++) {
220        int t = (cp->mDotI[0] * in[0]) +
221                (cp->mDotI[1] * in[1]) +
222                (cp->mDotI[2] * in[2]) +
223                (cp->mDotI[3] * in[3]);
224        sums[(t + 0x7f) >> 8] ++;
225        in += info->inStride[0];
226    }
227}
228
229void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
230                                                uint32_t xstart, uint32_t xend,
231                                                uint32_t outstep) {
232
233    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
234    uchar *in = (uchar *)info->inPtr[0];
235    int * sums = &cp->mSums[256 * info->lid];
236
237    for (uint32_t x = xstart; x < xend; x++) {
238        int t = (cp->mDotI[0] * in[0]) +
239                (cp->mDotI[1] * in[1]) +
240                (cp->mDotI[2] * in[2]);
241        sums[(t + 0x7f) >> 8] ++;
242        in += info->inStride[0];
243    }
244}
245
246void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
247                                                uint32_t xstart, uint32_t xend,
248                                                uint32_t outstep) {
249
250    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
251    uchar *in = (uchar *)info->inPtr[0];
252    int * sums = &cp->mSums[256 * info->lid];
253
254    for (uint32_t x = xstart; x < xend; x++) {
255        int t = (cp->mDotI[0] * in[0]) +
256                (cp->mDotI[1] * in[1]);
257        sums[(t + 0x7f) >> 8] ++;
258        in += info->inStride[0];
259    }
260}
261
262void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
263                                                uint32_t xstart, uint32_t xend,
264                                                uint32_t outstep) {
265
266    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
267    uchar *in = (uchar *)info->inPtr[0];
268    int * sums = &cp->mSums[256 * info->lid];
269
270    for (uint32_t x = xstart; x < xend; x++) {
271        int t = (cp->mDotI[0] * in[0]);
272        sums[(t + 0x7f) >> 8] ++;
273        in += info->inStride[0];
274    }
275}
276
277void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
278                                                uint32_t xstart, uint32_t xend,
279                                                uint32_t outstep) {
280
281    RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
282    uchar *in = (uchar *)info->inPtr[0];
283    int * sums = &cp->mSums[256 * info->lid];
284
285    for (uint32_t x = xstart; x < xend; x++) {
286        sums[in[0]] ++;
287        in += info->inStride[0];
288    }
289}
290
291
292RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
293                                                     const Script *s, const Element *e)
294            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
295
296    mRootPtr = nullptr;
297    mSums = new int[256 * 4 * mCtx->getThreadCount()];
298    mDot[0] = 0.299f;
299    mDot[1] = 0.587f;
300    mDot[2] = 0.114f;
301    mDot[3] = 0;
302    mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
303    mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
304    mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
305    mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
306}
307
308RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
309    if (mSums) {
310        delete []mSums;
311    }
312}
313
314void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
315    s->mHal.info.exportedVariableCount = 2;
316}
317
318void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
319}
320
321RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
322
323    return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
324}
325
326} // namespace renderscript
327} // namespace android
328