rsCpuIntrinsicBlend.cpp revision 0b575de8ed0b628d84d256f5846500b0385979bd
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21using namespace android;
22using namespace android::renderscript;
23
24namespace android {
25namespace renderscript {
26
27
28class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29public:
30    virtual void populateScript(Script *);
31
32    virtual ~RsdCpuScriptIntrinsicBlend();
33    RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
36    static void kernel(const RsForEachStubParamStruct *p,
37                          uint32_t xstart, uint32_t xend,
38                          uint32_t instep, uint32_t outstep);
39};
40
41}
42}
43
44
45enum {
46    BLEND_CLEAR = 0,
47    BLEND_SRC = 1,
48    BLEND_DST = 2,
49    BLEND_SRC_OVER = 3,
50    BLEND_DST_OVER = 4,
51    BLEND_SRC_IN = 5,
52    BLEND_DST_IN = 6,
53    BLEND_SRC_OUT = 7,
54    BLEND_DST_OUT = 8,
55    BLEND_SRC_ATOP = 9,
56    BLEND_DST_ATOP = 10,
57    BLEND_XOR = 11,
58
59    BLEND_NORMAL = 12,
60    BLEND_AVERAGE = 13,
61    BLEND_MULTIPLY = 14,
62    BLEND_SCREEN = 15,
63    BLEND_DARKEN = 16,
64    BLEND_LIGHTEN = 17,
65    BLEND_OVERLAY = 18,
66    BLEND_HARDLIGHT = 19,
67    BLEND_SOFTLIGHT = 20,
68    BLEND_DIFFERENCE = 21,
69    BLEND_NEGATION = 22,
70    BLEND_EXCLUSION = 23,
71    BLEND_COLOR_DODGE = 24,
72    BLEND_INVERSE_COLOR_DODGE = 25,
73    BLEND_SOFT_DODGE = 26,
74    BLEND_COLOR_BURN = 27,
75    BLEND_INVERSE_COLOR_BURN = 28,
76    BLEND_SOFT_BURN = 29,
77    BLEND_REFLECT = 30,
78    BLEND_GLOW = 31,
79    BLEND_FREEZE = 32,
80    BLEND_HEAT = 33,
81    BLEND_ADD = 34,
82    BLEND_SUBTRACT = 35,
83    BLEND_STAMP = 36,
84    BLEND_RED = 37,
85    BLEND_GREEN = 38,
86    BLEND_BLUE = 39,
87    BLEND_HUE = 40,
88    BLEND_SATURATION = 41,
89    BLEND_COLOR = 42,
90    BLEND_LUMINOSITY = 43
91};
92
93extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
94extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
95extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
96extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
97extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
98extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
99extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
100extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
101extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
102extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
103extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
104extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
105
106//#undef ARCH_ARM_HAVE_NEON
107
108void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
109                                        uint32_t xstart, uint32_t xend,
110                                        uint32_t instep, uint32_t outstep) {
111    RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
112
113    // instep/outstep can be ignored--sizeof(uchar4) known at compile time
114    uchar4 *out = (uchar4 *)p->out;
115    uchar4 *in = (uchar4 *)p->in;
116    uint32_t x1 = xstart;
117    uint32_t x2 = xend;
118
119    switch (p->slot) {
120    case BLEND_CLEAR:
121        for (;x1 < x2; x1++, out++) {
122            *out = 0;
123        }
124        break;
125    case BLEND_SRC:
126        for (;x1 < x2; x1++, out++, in++) {
127          *out = *in;
128        }
129        break;
130    //BLEND_DST is a NOP
131    case BLEND_DST:
132        break;
133    case BLEND_SRC_OVER:
134#if defined(ARCH_ARM_HAVE_NEON)
135        if((x1 + 8) < x2) {
136            uint32_t len = (x2 - x1) >> 3;
137            rsdIntrinsicBlendSrcOver_K(out, in, len);
138            x1 += len << 3;
139            out += len << 3;
140            in += len << 3;
141        }
142#endif
143        for (;x1 < x2; x1++, out++, in++) {
144            short4 in_s = convert_short4(*in);
145            short4 out_s = convert_short4(*out);
146            in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
147            *out = convert_uchar4(in_s);
148        }
149        break;
150    case BLEND_DST_OVER:
151#if defined(ARCH_ARM_HAVE_NEON)
152        if((x1 + 8) < x2) {
153            uint32_t len = (x2 - x1) >> 3;
154            rsdIntrinsicBlendDstOver_K(out, in, len);
155            x1 += len << 3;
156            out += len << 3;
157            in += len << 3;
158        }
159#endif
160        for (;x1 < x2; x1++, out++, in++) {
161            short4 in_s = convert_short4(*in);
162            short4 out_s = convert_short4(*out);
163            in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
164            *out = convert_uchar4(in_s);
165        }
166        break;
167    case BLEND_SRC_IN:
168#if defined(ARCH_ARM_HAVE_NEON)
169        if((x1 + 8) < x2) {
170            uint32_t len = (x2 - x1) >> 3;
171            rsdIntrinsicBlendSrcIn_K(out, in, len);
172            x1 += len << 3;
173            out += len << 3;
174            in += len << 3;
175        }
176#endif
177        for (;x1 < x2; x1++, out++, in++) {
178            short4 in_s = convert_short4(*in);
179            in_s = (in_s * out->w) >> (short4)8;
180            *out = convert_uchar4(in_s);
181        }
182        break;
183    case BLEND_DST_IN:
184#if defined(ARCH_ARM_HAVE_NEON)
185        if((x1 + 8) < x2) {
186            uint32_t len = (x2 - x1) >> 3;
187            rsdIntrinsicBlendDstIn_K(out, in, len);
188            x1 += len << 3;
189            out += len << 3;
190            in += len << 3;
191        }
192#endif
193        for (;x1 < x2; x1++, out++, in++) {
194            short4 out_s = convert_short4(*out);
195            out_s = (out_s * in->w) >> (short4)8;
196            *out = convert_uchar4(out_s);
197        }
198        break;
199    case BLEND_SRC_OUT:
200#if defined(ARCH_ARM_HAVE_NEON)
201        if((x1 + 8) < x2) {
202            uint32_t len = (x2 - x1) >> 3;
203            rsdIntrinsicBlendSrcOut_K(out, in, len);
204            x1 += len << 3;
205            out += len << 3;
206            in += len << 3;
207        }
208#endif
209        for (;x1 < x2; x1++, out++, in++) {
210            short4 in_s = convert_short4(*in);
211            in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
212            *out = convert_uchar4(in_s);
213        }
214        break;
215    case BLEND_DST_OUT:
216#if defined(ARCH_ARM_HAVE_NEON)
217        if((x1 + 8) < x2) {
218            uint32_t len = (x2 - x1) >> 3;
219            rsdIntrinsicBlendDstOut_K(out, in, len);
220            x1 += len << 3;
221            out += len << 3;
222            in += len << 3;
223        }
224#endif
225        for (;x1 < x2; x1++, out++, in++) {
226            short4 out_s = convert_short4(*out);
227            out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
228            *out = convert_uchar4(out_s);
229        }
230        break;
231    case BLEND_SRC_ATOP:
232#if defined(ARCH_ARM_HAVE_NEON)
233        if((x1 + 8) < x2) {
234            uint32_t len = (x2 - x1) >> 3;
235            rsdIntrinsicBlendSrcAtop_K(out, in, len);
236            x1 += len << 3;
237            out += len << 3;
238            in += len << 3;
239        }
240#endif
241        for (;x1 < x2; x1++, out++, in++) {
242            short4 in_s = convert_short4(*in);
243            short4 out_s = convert_short4(*out);
244            out_s.xyz = ((in_s.xyz * out_s.w) +
245              (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
246            *out = convert_uchar4(out_s);
247        }
248        break;
249    case BLEND_DST_ATOP:
250#if defined(ARCH_ARM_HAVE_NEON)
251        if((x1 + 8) < x2) {
252            uint32_t len = (x2 - x1) >> 3;
253            rsdIntrinsicBlendDstAtop_K(out, in, len);
254            x1 += len << 3;
255            out += len << 3;
256            in += len << 3;
257        }
258#endif
259        for (;x1 < x2; x1++, out++, in++) {
260            short4 in_s = convert_short4(*in);
261            short4 out_s = convert_short4(*out);
262            out_s.xyz = ((out_s.xyz * in_s.w) +
263              (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
264            *out = convert_uchar4(out_s);
265        }
266        break;
267    case BLEND_XOR:
268#if defined(ARCH_ARM_HAVE_NEON)
269        if((x1 + 8) < x2) {
270            uint32_t len = (x2 - x1) >> 3;
271            rsdIntrinsicBlendXor_K(out, in, len);
272            x1 += len << 3;
273            out += len << 3;
274            in += len << 3;
275        }
276#endif
277        for (;x1 < x2; x1++, out++, in++) {
278            *out = *in ^ *out;
279        }
280        break;
281    case BLEND_NORMAL:
282        ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
283        rsAssert(false);
284        break;
285    case BLEND_AVERAGE:
286        ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
287        rsAssert(false);
288        break;
289    case BLEND_MULTIPLY:
290#if defined(ARCH_ARM_HAVE_NEON)
291        if((x1 + 8) < x2) {
292            uint32_t len = (x2 - x1) >> 3;
293            rsdIntrinsicBlendMultiply_K(out, in, len);
294            x1 += len << 3;
295            out += len << 3;
296            in += len << 3;
297        }
298#endif
299        for (;x1 < x2; x1++, out++, in++) {
300          *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
301                                >> (short4)8);
302        }
303        break;
304    case BLEND_SCREEN:
305        ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
306        rsAssert(false);
307        break;
308    case BLEND_DARKEN:
309        ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
310        rsAssert(false);
311        break;
312    case BLEND_LIGHTEN:
313        ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
314        rsAssert(false);
315        break;
316    case BLEND_OVERLAY:
317        ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
318        rsAssert(false);
319        break;
320    case BLEND_HARDLIGHT:
321        ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
322        rsAssert(false);
323        break;
324    case BLEND_SOFTLIGHT:
325        ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
326        rsAssert(false);
327        break;
328    case BLEND_DIFFERENCE:
329        ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
330        rsAssert(false);
331        break;
332    case BLEND_NEGATION:
333        ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
334        rsAssert(false);
335        break;
336    case BLEND_EXCLUSION:
337        ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
338        rsAssert(false);
339        break;
340    case BLEND_COLOR_DODGE:
341        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
342        rsAssert(false);
343        break;
344    case BLEND_INVERSE_COLOR_DODGE:
345        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
346        rsAssert(false);
347        break;
348    case BLEND_SOFT_DODGE:
349        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
350        rsAssert(false);
351        break;
352    case BLEND_COLOR_BURN:
353        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
354        rsAssert(false);
355        break;
356    case BLEND_INVERSE_COLOR_BURN:
357        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
358        rsAssert(false);
359        break;
360    case BLEND_SOFT_BURN:
361        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
362        rsAssert(false);
363        break;
364    case BLEND_REFLECT:
365        ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
366        rsAssert(false);
367        break;
368    case BLEND_GLOW:
369        ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
370        rsAssert(false);
371        break;
372    case BLEND_FREEZE:
373        ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
374        rsAssert(false);
375        break;
376    case BLEND_HEAT:
377        ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
378        rsAssert(false);
379        break;
380    case BLEND_ADD:
381#if defined(ARCH_ARM_HAVE_NEON)
382        if((x1 + 8) < x2) {
383            uint32_t len = (x2 - x1) >> 3;
384            rsdIntrinsicBlendAdd_K(out, in, len);
385            x1 += len << 3;
386            out += len << 3;
387            in += len << 3;
388        }
389#endif
390        for (;x1 < x2; x1++, out++, in++) {
391            uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
392                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
393            out->x = (oR + iR) > 255 ? 255 : oR + iR;
394            out->y = (oG + iG) > 255 ? 255 : oG + iG;
395            out->z = (oB + iB) > 255 ? 255 : oB + iB;
396            out->w = (oA + iA) > 255 ? 255 : oA + iA;
397        }
398        break;
399    case BLEND_SUBTRACT:
400#if defined(ARCH_ARM_HAVE_NEON)
401        if((x1 + 8) < x2) {
402            uint32_t len = (x2 - x1) >> 3;
403            rsdIntrinsicBlendSub_K(out, in, len);
404            x1 += len << 3;
405            out += len << 3;
406            in += len << 3;
407        }
408#endif
409        for (;x1 < x2; x1++, out++, in++) {
410            int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
411                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
412            out->x = (oR - iR) < 0 ? 0 : oR - iR;
413            out->y = (oG - iG) < 0 ? 0 : oG - iG;
414            out->z = (oB - iB) < 0 ? 0 : oB - iB;
415            out->w = (oA - iA) < 0 ? 0 : oA - iA;
416        }
417        break;
418    case BLEND_STAMP:
419        ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
420        rsAssert(false);
421        break;
422    case BLEND_RED:
423        ALOGE("Called unimplemented blend intrinsic BLEND_RED");
424        rsAssert(false);
425        break;
426    case BLEND_GREEN:
427        ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
428        rsAssert(false);
429        break;
430    case BLEND_BLUE:
431        ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
432        rsAssert(false);
433        break;
434    case BLEND_HUE:
435        ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
436        rsAssert(false);
437        break;
438    case BLEND_SATURATION:
439        ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
440        rsAssert(false);
441        break;
442    case BLEND_COLOR:
443        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
444        rsAssert(false);
445        break;
446    case BLEND_LUMINOSITY:
447        ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
448        rsAssert(false);
449        break;
450
451    default:
452        ALOGE("Called unimplemented value %d", p->slot);
453        rsAssert(false);
454
455    }
456}
457
458
459RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
460                                                       const Script *s, const Element *e)
461            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
462
463    mRootPtr = &kernel;
464}
465
466RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
467}
468
469void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
470    s->mHal.info.exportedVariableCount = 0;
471}
472
473RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
474                                      const Script *s, const Element *e) {
475    return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
476}
477
478
479
480