1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21namespace android {
22namespace renderscript {
23
24
25class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
26public:
27    void populateScript(Script *) override;
28
29    ~RsdCpuScriptIntrinsicBlend() override;
30    RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
31
32protected:
33    static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
34                       uint32_t xend, uint32_t outstep);
35};
36
37} // namespace renderscript
38} // namespace android
39
40
41enum {
42    BLEND_CLEAR = 0,
43    BLEND_SRC = 1,
44    BLEND_DST = 2,
45    BLEND_SRC_OVER = 3,
46    BLEND_DST_OVER = 4,
47    BLEND_SRC_IN = 5,
48    BLEND_DST_IN = 6,
49    BLEND_SRC_OUT = 7,
50    BLEND_DST_OUT = 8,
51    BLEND_SRC_ATOP = 9,
52    BLEND_DST_ATOP = 10,
53    BLEND_XOR = 11,
54
55    BLEND_NORMAL = 12,
56    BLEND_AVERAGE = 13,
57    BLEND_MULTIPLY = 14,
58    BLEND_SCREEN = 15,
59    BLEND_DARKEN = 16,
60    BLEND_LIGHTEN = 17,
61    BLEND_OVERLAY = 18,
62    BLEND_HARDLIGHT = 19,
63    BLEND_SOFTLIGHT = 20,
64    BLEND_DIFFERENCE = 21,
65    BLEND_NEGATION = 22,
66    BLEND_EXCLUSION = 23,
67    BLEND_COLOR_DODGE = 24,
68    BLEND_INVERSE_COLOR_DODGE = 25,
69    BLEND_SOFT_DODGE = 26,
70    BLEND_COLOR_BURN = 27,
71    BLEND_INVERSE_COLOR_BURN = 28,
72    BLEND_SOFT_BURN = 29,
73    BLEND_REFLECT = 30,
74    BLEND_GLOW = 31,
75    BLEND_FREEZE = 32,
76    BLEND_HEAT = 33,
77    BLEND_ADD = 34,
78    BLEND_SUBTRACT = 35,
79    BLEND_STAMP = 36,
80    BLEND_RED = 37,
81    BLEND_GREEN = 38,
82    BLEND_BLUE = 39,
83    BLEND_HUE = 40,
84    BLEND_SATURATION = 41,
85    BLEND_COLOR = 42,
86    BLEND_LUMINOSITY = 43
87};
88
89#if defined(ARCH_ARM_USE_INTRINSICS)
90extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
91                    uint32_t xstart, uint32_t xend);
92#endif
93
94#if defined(ARCH_X86_HAVE_SSSE3)
95extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
96extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
97extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
98extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
99extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
100extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
101extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
102extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
103extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
104extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
105extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
106extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
107#endif
108
109namespace android {
110namespace renderscript {
111
112void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113                                        uint32_t xstart, uint32_t xend,
114                                        uint32_t outstep) {
115    // instep/outstep can be ignored--sizeof(uchar4) known at compile time
116    uchar4 *out = (uchar4 *)info->outPtr[0];
117    uchar4 *in = (uchar4 *)info->inPtr[0];
118    uint32_t x1 = xstart;
119    uint32_t x2 = xend;
120
121#if defined(ARCH_ARM_USE_INTRINSICS)
122    if (gArchUseSIMD) {
123        if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
124            return;
125    }
126#endif
127    switch (info->slot) {
128    case BLEND_CLEAR:
129        for (;x1 < x2; x1++, out++) {
130            *out = 0;
131        }
132        break;
133    case BLEND_SRC:
134        for (;x1 < x2; x1++, out++, in++) {
135          *out = *in;
136        }
137        break;
138    //BLEND_DST is a NOP
139    case BLEND_DST:
140        break;
141    case BLEND_SRC_OVER:
142    #if defined(ARCH_X86_HAVE_SSSE3)
143        if (gArchUseSIMD) {
144            if ((x1 + 8) < x2) {
145                uint32_t len = (x2 - x1) >> 3;
146                rsdIntrinsicBlendSrcOver_K(out, in, len);
147                x1 += len << 3;
148                out += len << 3;
149                in += len << 3;
150            }
151        }
152    #endif
153        for (;x1 < x2; x1++, out++, in++) {
154            short4 in_s = convert_short4(*in);
155            short4 out_s = convert_short4(*out);
156            in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
157            *out = convert_uchar4(in_s);
158        }
159        break;
160    case BLEND_DST_OVER:
161    #if defined(ARCH_X86_HAVE_SSSE3)
162        if (gArchUseSIMD) {
163            if ((x1 + 8) < x2) {
164                uint32_t len = (x2 - x1) >> 3;
165                rsdIntrinsicBlendDstOver_K(out, in, len);
166                x1 += len << 3;
167                out += len << 3;
168                in += len << 3;
169            }
170        }
171     #endif
172        for (;x1 < x2; x1++, out++, in++) {
173            short4 in_s = convert_short4(*in);
174            short4 out_s = convert_short4(*out);
175            in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
176            *out = convert_uchar4(in_s);
177        }
178        break;
179    case BLEND_SRC_IN:
180    #if defined(ARCH_X86_HAVE_SSSE3)
181        if (gArchUseSIMD) {
182            if ((x1 + 8) < x2) {
183                uint32_t len = (x2 - x1) >> 3;
184                rsdIntrinsicBlendSrcIn_K(out, in, len);
185                x1 += len << 3;
186                out += len << 3;
187                in += len << 3;
188            }
189        }
190    #endif
191        for (;x1 < x2; x1++, out++, in++) {
192            short4 in_s = convert_short4(*in);
193            in_s = (in_s * out->w) >> (short4)8;
194            *out = convert_uchar4(in_s);
195        }
196        break;
197    case BLEND_DST_IN:
198    #if defined(ARCH_X86_HAVE_SSSE3)
199        if (gArchUseSIMD) {
200            if ((x1 + 8) < x2) {
201                uint32_t len = (x2 - x1) >> 3;
202                rsdIntrinsicBlendDstIn_K(out, in, len);
203                x1 += len << 3;
204                out += len << 3;
205                in += len << 3;
206            }
207        }
208     #endif
209        for (;x1 < x2; x1++, out++, in++) {
210            short4 out_s = convert_short4(*out);
211            out_s = (out_s * in->w) >> (short4)8;
212            *out = convert_uchar4(out_s);
213        }
214        break;
215    case BLEND_SRC_OUT:
216    #if defined(ARCH_X86_HAVE_SSSE3)
217        if (gArchUseSIMD) {
218            if ((x1 + 8) < x2) {
219                uint32_t len = (x2 - x1) >> 3;
220                rsdIntrinsicBlendSrcOut_K(out, in, len);
221                x1 += len << 3;
222                out += len << 3;
223                in += len << 3;
224            }
225        }
226    #endif
227        for (;x1 < x2; x1++, out++, in++) {
228            short4 in_s = convert_short4(*in);
229            in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
230            *out = convert_uchar4(in_s);
231        }
232        break;
233    case BLEND_DST_OUT:
234    #if defined(ARCH_X86_HAVE_SSSE3)
235        if (gArchUseSIMD) {
236            if ((x1 + 8) < x2) {
237                uint32_t len = (x2 - x1) >> 3;
238                rsdIntrinsicBlendDstOut_K(out, in, len);
239                x1 += len << 3;
240                out += len << 3;
241                in += len << 3;
242            }
243        }
244    #endif
245        for (;x1 < x2; x1++, out++, in++) {
246            short4 out_s = convert_short4(*out);
247            out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
248            *out = convert_uchar4(out_s);
249        }
250        break;
251    case BLEND_SRC_ATOP:
252    #if defined(ARCH_X86_HAVE_SSSE3)
253        if (gArchUseSIMD) {
254            if ((x1 + 8) < x2) {
255                uint32_t len = (x2 - x1) >> 3;
256                rsdIntrinsicBlendSrcAtop_K(out, in, len);
257                x1 += len << 3;
258                out += len << 3;
259                in += len << 3;
260            }
261        }
262    #endif
263        for (;x1 < x2; x1++, out++, in++) {
264            short4 in_s = convert_short4(*in);
265            short4 out_s = convert_short4(*out);
266            out_s.xyz = ((in_s.xyz * out_s.w) +
267              (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
268            *out = convert_uchar4(out_s);
269        }
270        break;
271    case BLEND_DST_ATOP:
272    #if defined(ARCH_X86_HAVE_SSSE3)
273        if (gArchUseSIMD) {
274            if ((x1 + 8) < x2) {
275                uint32_t len = (x2 - x1) >> 3;
276                rsdIntrinsicBlendDstAtop_K(out, in, len);
277                x1 += len << 3;
278                out += len << 3;
279                in += len << 3;
280            }
281        }
282     #endif
283        for (;x1 < x2; x1++, out++, in++) {
284            short4 in_s = convert_short4(*in);
285            short4 out_s = convert_short4(*out);
286            out_s.xyz = ((out_s.xyz * in_s.w) +
287              (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
288            out_s.w = in_s.w;
289            *out = convert_uchar4(out_s);
290        }
291        break;
292    case BLEND_XOR:
293    #if defined(ARCH_X86_HAVE_SSSE3)
294        if (gArchUseSIMD) {
295            if ((x1 + 8) < x2) {
296                uint32_t len = (x2 - x1) >> 3;
297                rsdIntrinsicBlendXor_K(out, in, len);
298                x1 += len << 3;
299                out += len << 3;
300                in += len << 3;
301            }
302        }
303    #endif
304        for (;x1 < x2; x1++, out++, in++) {
305            *out = *in ^ *out;
306        }
307        break;
308    case BLEND_NORMAL:
309        ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
310        rsAssert(false);
311        break;
312    case BLEND_AVERAGE:
313        ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
314        rsAssert(false);
315        break;
316    case BLEND_MULTIPLY:
317    #if defined(ARCH_X86_HAVE_SSSE3)
318        if (gArchUseSIMD) {
319            if ((x1 + 8) < x2) {
320                uint32_t len = (x2 - x1) >> 3;
321                rsdIntrinsicBlendMultiply_K(out, in, len);
322                x1 += len << 3;
323                out += len << 3;
324                in += len << 3;
325            }
326        }
327    #endif
328        for (;x1 < x2; x1++, out++, in++) {
329          *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
330                                >> (short4)8);
331        }
332        break;
333    case BLEND_SCREEN:
334        ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
335        rsAssert(false);
336        break;
337    case BLEND_DARKEN:
338        ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
339        rsAssert(false);
340        break;
341    case BLEND_LIGHTEN:
342        ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
343        rsAssert(false);
344        break;
345    case BLEND_OVERLAY:
346        ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
347        rsAssert(false);
348        break;
349    case BLEND_HARDLIGHT:
350        ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
351        rsAssert(false);
352        break;
353    case BLEND_SOFTLIGHT:
354        ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
355        rsAssert(false);
356        break;
357    case BLEND_DIFFERENCE:
358        ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
359        rsAssert(false);
360        break;
361    case BLEND_NEGATION:
362        ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
363        rsAssert(false);
364        break;
365    case BLEND_EXCLUSION:
366        ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
367        rsAssert(false);
368        break;
369    case BLEND_COLOR_DODGE:
370        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
371        rsAssert(false);
372        break;
373    case BLEND_INVERSE_COLOR_DODGE:
374        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
375        rsAssert(false);
376        break;
377    case BLEND_SOFT_DODGE:
378        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
379        rsAssert(false);
380        break;
381    case BLEND_COLOR_BURN:
382        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
383        rsAssert(false);
384        break;
385    case BLEND_INVERSE_COLOR_BURN:
386        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
387        rsAssert(false);
388        break;
389    case BLEND_SOFT_BURN:
390        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
391        rsAssert(false);
392        break;
393    case BLEND_REFLECT:
394        ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
395        rsAssert(false);
396        break;
397    case BLEND_GLOW:
398        ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
399        rsAssert(false);
400        break;
401    case BLEND_FREEZE:
402        ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
403        rsAssert(false);
404        break;
405    case BLEND_HEAT:
406        ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
407        rsAssert(false);
408        break;
409    case BLEND_ADD:
410    #if defined(ARCH_X86_HAVE_SSSE3)
411        if (gArchUseSIMD) {
412            if((x1 + 8) < x2) {
413                uint32_t len = (x2 - x1) >> 3;
414                rsdIntrinsicBlendAdd_K(out, in, len);
415                x1 += len << 3;
416                out += len << 3;
417                in += len << 3;
418            }
419        }
420    #endif
421        for (;x1 < x2; x1++, out++, in++) {
422            uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
423                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
424            out->x = (oR + iR) > 255 ? 255 : oR + iR;
425            out->y = (oG + iG) > 255 ? 255 : oG + iG;
426            out->z = (oB + iB) > 255 ? 255 : oB + iB;
427            out->w = (oA + iA) > 255 ? 255 : oA + iA;
428        }
429        break;
430    case BLEND_SUBTRACT:
431    #if defined(ARCH_X86_HAVE_SSSE3)
432        if (gArchUseSIMD) {
433            if((x1 + 8) < x2) {
434                uint32_t len = (x2 - x1) >> 3;
435                rsdIntrinsicBlendSub_K(out, in, len);
436                x1 += len << 3;
437                out += len << 3;
438                in += len << 3;
439            }
440        }
441    #endif
442        for (;x1 < x2; x1++, out++, in++) {
443            int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
444                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
445            out->x = (oR - iR) < 0 ? 0 : oR - iR;
446            out->y = (oG - iG) < 0 ? 0 : oG - iG;
447            out->z = (oB - iB) < 0 ? 0 : oB - iB;
448            out->w = (oA - iA) < 0 ? 0 : oA - iA;
449        }
450        break;
451    case BLEND_STAMP:
452        ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
453        rsAssert(false);
454        break;
455    case BLEND_RED:
456        ALOGE("Called unimplemented blend intrinsic BLEND_RED");
457        rsAssert(false);
458        break;
459    case BLEND_GREEN:
460        ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
461        rsAssert(false);
462        break;
463    case BLEND_BLUE:
464        ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
465        rsAssert(false);
466        break;
467    case BLEND_HUE:
468        ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
469        rsAssert(false);
470        break;
471    case BLEND_SATURATION:
472        ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
473        rsAssert(false);
474        break;
475    case BLEND_COLOR:
476        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
477        rsAssert(false);
478        break;
479    case BLEND_LUMINOSITY:
480        ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
481        rsAssert(false);
482        break;
483
484    default:
485        ALOGE("Called unimplemented value %d", info->slot);
486        rsAssert(false);
487
488    }
489}
490
491
492RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
493                                                       const Script *s, const Element *e)
494            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
495
496    mRootPtr = &kernel;
497}
498
499RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
500}
501
502void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
503    s->mHal.info.exportedVariableCount = 0;
504}
505
506RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
507                                      const Script *s, const Element *e) {
508    return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
509}
510
511} // namespace renderscript
512} // namespace android
513