rsCpuIntrinsicYuvToRGB.cpp revision 4a1495f3404b939459aa817f06ea9325ca1a4625
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21#ifdef RS_COMPATIBILITY_LIB
22#include "rsCompatibilityLib.h"
23#endif
24
25#ifndef RS_COMPATIBILITY_LIB
26#include "hardware/gralloc.h"
27#endif
28
29using namespace android;
30using namespace android::renderscript;
31
32namespace android {
33namespace renderscript {
34
35
36class RsdCpuScriptIntrinsicYuvToRGB : public RsdCpuScriptIntrinsic {
37public:
38    virtual void populateScript(Script *);
39    virtual void invokeFreeChildren();
40
41    virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
42
43    virtual ~RsdCpuScriptIntrinsicYuvToRGB();
44    RsdCpuScriptIntrinsicYuvToRGB(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
45
46protected:
47    ObjectBaseRef<Allocation> alloc;
48
49    static void kernel(const RsForEachStubParamStruct *p,
50                       uint32_t xstart, uint32_t xend,
51                       uint32_t instep, uint32_t outstep);
52};
53
54}
55}
56
57
58void RsdCpuScriptIntrinsicYuvToRGB::setGlobalObj(uint32_t slot, ObjectBase *data) {
59    rsAssert(slot == 0);
60    alloc.set(static_cast<Allocation *>(data));
61}
62
63
64
65
66static uchar4 rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
67    short Y = ((short)y) - 16;
68    short U = ((short)u) - 128;
69    short V = ((short)v) - 128;
70
71    short4 p;
72    p.x = (Y * 298 + V * 409 + 128) >> 8;
73    p.y = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
74    p.z = (Y * 298 + U * 516 + 128) >> 8;
75    p.w = 255;
76    if(p.x < 0) {
77        p.x = 0;
78    }
79    if(p.x > 255) {
80        p.x = 255;
81    }
82    if(p.y < 0) {
83        p.y = 0;
84    }
85    if(p.y > 255) {
86        p.y = 255;
87    }
88    if(p.z < 0) {
89        p.z = 0;
90    }
91    if(p.z > 255) {
92        p.z = 255;
93    }
94
95    return (uchar4){p.x, p.y, p.z, p.w};
96}
97
98
99static short YuvCoeff[] = {
100    298, 409, -100, 516,   -208, 255, 0, 0,
101    16, 16, 16, 16,        16, 16, 16, 16,
102    128, 128, 128, 128, 128, 128, 128, 128,
103    298, 298, 298, 298, 298, 298, 298, 298,
104    255, 255, 255, 255, 255, 255, 255, 255
105
106
107};
108
109extern "C" void rsdIntrinsicYuv_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
110extern "C" void rsdIntrinsicYuvR_K(void *dst, const uchar *Y, const uchar *uv, uint32_t count, const short *param);
111extern "C" void rsdIntrinsicYuv2_K(void *dst, const uchar *Y, const uchar *u, const uchar *v, uint32_t count, const short *param);
112
113void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsForEachStubParamStruct *p,
114                                           uint32_t xstart, uint32_t xend,
115                                           uint32_t instep, uint32_t outstep) {
116    RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
117    if (!cp->alloc.get()) {
118        ALOGE("YuvToRGB executed without input, skipping");
119        return;
120    }
121    const uchar *pinY = (const uchar *)cp->alloc->mHal.drvState.lod[0].mallocPtr;
122    if (pinY == NULL) {
123        ALOGE("YuvToRGB executed without data, skipping");
124        return;
125    }
126
127    size_t strideY = cp->alloc->mHal.drvState.lod[0].stride;
128
129    // calculate correct stride in legacy case
130    if (cp->alloc->mHal.drvState.lod[0].dimY == 0) {
131        strideY = p->dimX;
132    }
133    const uchar *Y = pinY + (p->y * strideY);
134
135    uchar4 *out = (uchar4 *)p->out;
136    uint32_t x1 = xstart;
137    uint32_t x2 = xend;
138
139    size_t cstep = cp->alloc->mHal.drvState.yuv.step;
140
141    const uchar *pinU = (const uchar *)cp->alloc->mHal.drvState.lod[1].mallocPtr;
142    const size_t strideU = cp->alloc->mHal.drvState.lod[1].stride;
143    const uchar *u = pinU + ((p->y >> 1) * strideU);
144
145    const uchar *pinV = (const uchar *)cp->alloc->mHal.drvState.lod[2].mallocPtr;
146    const size_t strideV = cp->alloc->mHal.drvState.lod[2].stride;
147    const uchar *v = pinV + ((p->y >> 1) * strideV);
148
149    //ALOGE("pinY, %p, Y, %p, p->y, %d, strideY, %d", pinY, Y, p->y, strideY);
150    //ALOGE("pinU, %p, U, %p, p->y, %d, strideU, %d", pinU, u, p->y, strideU);
151    //ALOGE("pinV, %p, V, %p, p->y, %d, strideV, %d", pinV, v, p->y, strideV);
152    //ALOGE("dimX, %d, dimY, %d", cp->alloc->mHal.drvState.lod[0].dimX, cp->alloc->mHal.drvState.lod[0].dimY);
153    //ALOGE("p->dimX, %d, p->dimY, %d", p->dimX, p->dimY);
154
155    if (pinU == NULL) {
156        // Legacy yuv support didn't fill in uv
157        v = ((uint8_t *)cp->alloc->mHal.drvState.lod[0].mallocPtr) +
158            (strideY * p->dimY) +
159            ((p->y >> 1) * strideY);
160        u = v + 1;
161        cstep = 2;
162    }
163
164#if defined(ARCH_ARM_HAVE_VFP)
165    if((x2 > x1) && gArchUseSIMD) {
166        // The neon paths may over-read by up to 8 bytes
167        int32_t len = (x2 - x1 - 8) >> 3;
168        if(len > 0) {
169            if (cstep == 1) {
170                rsdIntrinsicYuv2_K(out, Y, u, v, len, YuvCoeff);
171                x1 += len << 3;
172                out += len << 3;
173            } else if (cstep == 2) {
174                // Check for proper interleave
175                intptr_t ipu = (intptr_t)u;
176                intptr_t ipv = (intptr_t)v;
177
178                if (ipu == (ipv + 1)) {
179                    rsdIntrinsicYuv_K(out, Y, v, len, YuvCoeff);
180                    x1 += len << 3;
181                    out += len << 3;
182                } else if (ipu == (ipv - 1)) {
183                    rsdIntrinsicYuvR_K(out, Y, u, len, YuvCoeff);
184                    x1 += len << 3;
185                    out += len << 3;
186                }
187
188            }
189        }
190    }
191#endif
192
193    if(x2 > x1) {
194       // ALOGE("y %i  %i  %i", p->y, x1, x2);
195        while(x1 < x2) {
196            int cx = (x1 >> 1) * cstep;
197            *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
198            out++;
199            x1++;
200            *out = rsYuvToRGBA_uchar4(Y[x1], u[cx], v[cx]);
201            out++;
202            x1++;
203        }
204    }
205
206}
207
208RsdCpuScriptIntrinsicYuvToRGB::RsdCpuScriptIntrinsicYuvToRGB(
209            RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
210            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB) {
211
212    mRootPtr = &kernel;
213}
214
215RsdCpuScriptIntrinsicYuvToRGB::~RsdCpuScriptIntrinsicYuvToRGB() {
216}
217
218void RsdCpuScriptIntrinsicYuvToRGB::populateScript(Script *s) {
219    s->mHal.info.exportedVariableCount = 1;
220}
221
222void RsdCpuScriptIntrinsicYuvToRGB::invokeFreeChildren() {
223    alloc.clear();
224}
225
226
227RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx,
228                                         const Script *s, const Element *e) {
229    return new RsdCpuScriptIntrinsicYuvToRGB(ctx, s, e);
230}
231
232
233