1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21using namespace android;
22using namespace android::renderscript;
23
24namespace android {
25namespace renderscript {
26
27
28class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29public:
30    void populateScript(Script *) override;
31
32    ~RsdCpuScriptIntrinsicBlend() override;
33    RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
36    static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
37                       uint32_t xend, uint32_t outstep);
38};
39
40}
41}
42
43
44enum {
45    BLEND_CLEAR = 0,
46    BLEND_SRC = 1,
47    BLEND_DST = 2,
48    BLEND_SRC_OVER = 3,
49    BLEND_DST_OVER = 4,
50    BLEND_SRC_IN = 5,
51    BLEND_DST_IN = 6,
52    BLEND_SRC_OUT = 7,
53    BLEND_DST_OUT = 8,
54    BLEND_SRC_ATOP = 9,
55    BLEND_DST_ATOP = 10,
56    BLEND_XOR = 11,
57
58    BLEND_NORMAL = 12,
59    BLEND_AVERAGE = 13,
60    BLEND_MULTIPLY = 14,
61    BLEND_SCREEN = 15,
62    BLEND_DARKEN = 16,
63    BLEND_LIGHTEN = 17,
64    BLEND_OVERLAY = 18,
65    BLEND_HARDLIGHT = 19,
66    BLEND_SOFTLIGHT = 20,
67    BLEND_DIFFERENCE = 21,
68    BLEND_NEGATION = 22,
69    BLEND_EXCLUSION = 23,
70    BLEND_COLOR_DODGE = 24,
71    BLEND_INVERSE_COLOR_DODGE = 25,
72    BLEND_SOFT_DODGE = 26,
73    BLEND_COLOR_BURN = 27,
74    BLEND_INVERSE_COLOR_BURN = 28,
75    BLEND_SOFT_BURN = 29,
76    BLEND_REFLECT = 30,
77    BLEND_GLOW = 31,
78    BLEND_FREEZE = 32,
79    BLEND_HEAT = 33,
80    BLEND_ADD = 34,
81    BLEND_SUBTRACT = 35,
82    BLEND_STAMP = 36,
83    BLEND_RED = 37,
84    BLEND_GREEN = 38,
85    BLEND_BLUE = 39,
86    BLEND_HUE = 40,
87    BLEND_SATURATION = 41,
88    BLEND_COLOR = 42,
89    BLEND_LUMINOSITY = 43
90};
91
92#if defined(ARCH_ARM_USE_INTRINSICS)
93extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
94                    uint32_t xstart, uint32_t xend);
95#endif
96
97#if defined(ARCH_X86_HAVE_SSSE3)
98extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
99extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
100extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
101extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
102extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
103extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
104extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
105extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
106extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
107extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
108extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
109extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
110#endif
111
112void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
113                                        uint32_t xstart, uint32_t xend,
114                                        uint32_t outstep) {
115    RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
116
117    // instep/outstep can be ignored--sizeof(uchar4) known at compile time
118    uchar4 *out = (uchar4 *)info->outPtr[0];
119    uchar4 *in = (uchar4 *)info->inPtr[0];
120    uint32_t x1 = xstart;
121    uint32_t x2 = xend;
122
123#if defined(ARCH_ARM_USE_INTRINSICS)
124    // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
125    // been fixed.
126    if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
127        if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
128            return;
129    }
130#endif
131    switch (info->slot) {
132    case BLEND_CLEAR:
133        for (;x1 < x2; x1++, out++) {
134            *out = 0;
135        }
136        break;
137    case BLEND_SRC:
138        for (;x1 < x2; x1++, out++, in++) {
139          *out = *in;
140        }
141        break;
142    //BLEND_DST is a NOP
143    case BLEND_DST:
144        break;
145    case BLEND_SRC_OVER:
146    #if defined(ARCH_X86_HAVE_SSSE3)
147        if (gArchUseSIMD) {
148            if ((x1 + 8) < x2) {
149                uint32_t len = (x2 - x1) >> 3;
150                rsdIntrinsicBlendSrcOver_K(out, in, len);
151                x1 += len << 3;
152                out += len << 3;
153                in += len << 3;
154            }
155        }
156    #endif
157        for (;x1 < x2; x1++, out++, in++) {
158            short4 in_s = convert_short4(*in);
159            short4 out_s = convert_short4(*out);
160            in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
161            *out = convert_uchar4(in_s);
162        }
163        break;
164    case BLEND_DST_OVER:
165    #if defined(ARCH_X86_HAVE_SSSE3)
166        if (gArchUseSIMD) {
167            if ((x1 + 8) < x2) {
168                uint32_t len = (x2 - x1) >> 3;
169                rsdIntrinsicBlendDstOver_K(out, in, len);
170                x1 += len << 3;
171                out += len << 3;
172                in += len << 3;
173            }
174        }
175     #endif
176        for (;x1 < x2; x1++, out++, in++) {
177            short4 in_s = convert_short4(*in);
178            short4 out_s = convert_short4(*out);
179            in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
180            *out = convert_uchar4(in_s);
181        }
182        break;
183    case BLEND_SRC_IN:
184    #if defined(ARCH_X86_HAVE_SSSE3)
185        if (gArchUseSIMD) {
186            if ((x1 + 8) < x2) {
187                uint32_t len = (x2 - x1) >> 3;
188                rsdIntrinsicBlendSrcIn_K(out, in, len);
189                x1 += len << 3;
190                out += len << 3;
191                in += len << 3;
192            }
193        }
194    #endif
195        for (;x1 < x2; x1++, out++, in++) {
196            short4 in_s = convert_short4(*in);
197            in_s = (in_s * out->w) >> (short4)8;
198            *out = convert_uchar4(in_s);
199        }
200        break;
201    case BLEND_DST_IN:
202    #if defined(ARCH_X86_HAVE_SSSE3)
203        if (gArchUseSIMD) {
204            if ((x1 + 8) < x2) {
205                uint32_t len = (x2 - x1) >> 3;
206                rsdIntrinsicBlendDstIn_K(out, in, len);
207                x1 += len << 3;
208                out += len << 3;
209                in += len << 3;
210            }
211        }
212     #endif
213        for (;x1 < x2; x1++, out++, in++) {
214            short4 out_s = convert_short4(*out);
215            out_s = (out_s * in->w) >> (short4)8;
216            *out = convert_uchar4(out_s);
217        }
218        break;
219    case BLEND_SRC_OUT:
220    #if defined(ARCH_X86_HAVE_SSSE3)
221        if (gArchUseSIMD) {
222            if ((x1 + 8) < x2) {
223                uint32_t len = (x2 - x1) >> 3;
224                rsdIntrinsicBlendSrcOut_K(out, in, len);
225                x1 += len << 3;
226                out += len << 3;
227                in += len << 3;
228            }
229        }
230    #endif
231        for (;x1 < x2; x1++, out++, in++) {
232            short4 in_s = convert_short4(*in);
233            in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
234            *out = convert_uchar4(in_s);
235        }
236        break;
237    case BLEND_DST_OUT:
238    #if defined(ARCH_X86_HAVE_SSSE3)
239        if (gArchUseSIMD) {
240            if ((x1 + 8) < x2) {
241                uint32_t len = (x2 - x1) >> 3;
242                rsdIntrinsicBlendDstOut_K(out, in, len);
243                x1 += len << 3;
244                out += len << 3;
245                in += len << 3;
246            }
247        }
248    #endif
249        for (;x1 < x2; x1++, out++, in++) {
250            short4 out_s = convert_short4(*out);
251            out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
252            *out = convert_uchar4(out_s);
253        }
254        break;
255    case BLEND_SRC_ATOP:
256    #if defined(ARCH_X86_HAVE_SSSE3)
257        if (gArchUseSIMD) {
258            if ((x1 + 8) < x2) {
259                uint32_t len = (x2 - x1) >> 3;
260                rsdIntrinsicBlendSrcAtop_K(out, in, len);
261                x1 += len << 3;
262                out += len << 3;
263                in += len << 3;
264            }
265        }
266    #endif
267        for (;x1 < x2; x1++, out++, in++) {
268            short4 in_s = convert_short4(*in);
269            short4 out_s = convert_short4(*out);
270            out_s.xyz = ((in_s.xyz * out_s.w) +
271              (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
272            *out = convert_uchar4(out_s);
273        }
274        break;
275    case BLEND_DST_ATOP:
276    #if defined(ARCH_X86_HAVE_SSSE3)
277        if (gArchUseSIMD) {
278            if ((x1 + 8) < x2) {
279                uint32_t len = (x2 - x1) >> 3;
280                rsdIntrinsicBlendDstAtop_K(out, in, len);
281                x1 += len << 3;
282                out += len << 3;
283                in += len << 3;
284            }
285        }
286     #endif
287        for (;x1 < x2; x1++, out++, in++) {
288            short4 in_s = convert_short4(*in);
289            short4 out_s = convert_short4(*out);
290            out_s.xyz = ((out_s.xyz * in_s.w) +
291              (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
292            out_s.w = in_s.w;
293            *out = convert_uchar4(out_s);
294        }
295        break;
296    case BLEND_XOR:
297    #if defined(ARCH_X86_HAVE_SSSE3)
298        if (gArchUseSIMD) {
299            if ((x1 + 8) < x2) {
300                uint32_t len = (x2 - x1) >> 3;
301                rsdIntrinsicBlendXor_K(out, in, len);
302                x1 += len << 3;
303                out += len << 3;
304                in += len << 3;
305            }
306        }
307    #endif
308        for (;x1 < x2; x1++, out++, in++) {
309            *out = *in ^ *out;
310        }
311        break;
312    case BLEND_NORMAL:
313        ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
314        rsAssert(false);
315        break;
316    case BLEND_AVERAGE:
317        ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
318        rsAssert(false);
319        break;
320    case BLEND_MULTIPLY:
321    #if defined(ARCH_X86_HAVE_SSSE3)
322        if (gArchUseSIMD) {
323            if ((x1 + 8) < x2) {
324                uint32_t len = (x2 - x1) >> 3;
325                rsdIntrinsicBlendMultiply_K(out, in, len);
326                x1 += len << 3;
327                out += len << 3;
328                in += len << 3;
329            }
330        }
331    #endif
332        for (;x1 < x2; x1++, out++, in++) {
333          *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
334                                >> (short4)8);
335        }
336        break;
337    case BLEND_SCREEN:
338        ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
339        rsAssert(false);
340        break;
341    case BLEND_DARKEN:
342        ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
343        rsAssert(false);
344        break;
345    case BLEND_LIGHTEN:
346        ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
347        rsAssert(false);
348        break;
349    case BLEND_OVERLAY:
350        ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
351        rsAssert(false);
352        break;
353    case BLEND_HARDLIGHT:
354        ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
355        rsAssert(false);
356        break;
357    case BLEND_SOFTLIGHT:
358        ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
359        rsAssert(false);
360        break;
361    case BLEND_DIFFERENCE:
362        ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
363        rsAssert(false);
364        break;
365    case BLEND_NEGATION:
366        ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
367        rsAssert(false);
368        break;
369    case BLEND_EXCLUSION:
370        ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
371        rsAssert(false);
372        break;
373    case BLEND_COLOR_DODGE:
374        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
375        rsAssert(false);
376        break;
377    case BLEND_INVERSE_COLOR_DODGE:
378        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
379        rsAssert(false);
380        break;
381    case BLEND_SOFT_DODGE:
382        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
383        rsAssert(false);
384        break;
385    case BLEND_COLOR_BURN:
386        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
387        rsAssert(false);
388        break;
389    case BLEND_INVERSE_COLOR_BURN:
390        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
391        rsAssert(false);
392        break;
393    case BLEND_SOFT_BURN:
394        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
395        rsAssert(false);
396        break;
397    case BLEND_REFLECT:
398        ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
399        rsAssert(false);
400        break;
401    case BLEND_GLOW:
402        ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
403        rsAssert(false);
404        break;
405    case BLEND_FREEZE:
406        ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
407        rsAssert(false);
408        break;
409    case BLEND_HEAT:
410        ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
411        rsAssert(false);
412        break;
413    case BLEND_ADD:
414    #if defined(ARCH_X86_HAVE_SSSE3)
415        if (gArchUseSIMD) {
416            if((x1 + 8) < x2) {
417                uint32_t len = (x2 - x1) >> 3;
418                rsdIntrinsicBlendAdd_K(out, in, len);
419                x1 += len << 3;
420                out += len << 3;
421                in += len << 3;
422            }
423        }
424    #endif
425        for (;x1 < x2; x1++, out++, in++) {
426            uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
427                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
428            out->x = (oR + iR) > 255 ? 255 : oR + iR;
429            out->y = (oG + iG) > 255 ? 255 : oG + iG;
430            out->z = (oB + iB) > 255 ? 255 : oB + iB;
431            out->w = (oA + iA) > 255 ? 255 : oA + iA;
432        }
433        break;
434    case BLEND_SUBTRACT:
435    #if defined(ARCH_X86_HAVE_SSSE3)
436        if (gArchUseSIMD) {
437            if((x1 + 8) < x2) {
438                uint32_t len = (x2 - x1) >> 3;
439                rsdIntrinsicBlendSub_K(out, in, len);
440                x1 += len << 3;
441                out += len << 3;
442                in += len << 3;
443            }
444        }
445    #endif
446        for (;x1 < x2; x1++, out++, in++) {
447            int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
448                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
449            out->x = (oR - iR) < 0 ? 0 : oR - iR;
450            out->y = (oG - iG) < 0 ? 0 : oG - iG;
451            out->z = (oB - iB) < 0 ? 0 : oB - iB;
452            out->w = (oA - iA) < 0 ? 0 : oA - iA;
453        }
454        break;
455    case BLEND_STAMP:
456        ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
457        rsAssert(false);
458        break;
459    case BLEND_RED:
460        ALOGE("Called unimplemented blend intrinsic BLEND_RED");
461        rsAssert(false);
462        break;
463    case BLEND_GREEN:
464        ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
465        rsAssert(false);
466        break;
467    case BLEND_BLUE:
468        ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
469        rsAssert(false);
470        break;
471    case BLEND_HUE:
472        ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
473        rsAssert(false);
474        break;
475    case BLEND_SATURATION:
476        ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
477        rsAssert(false);
478        break;
479    case BLEND_COLOR:
480        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
481        rsAssert(false);
482        break;
483    case BLEND_LUMINOSITY:
484        ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
485        rsAssert(false);
486        break;
487
488    default:
489        ALOGE("Called unimplemented value %d", info->slot);
490        rsAssert(false);
491
492    }
493}
494
495
496RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
497                                                       const Script *s, const Element *e)
498            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
499
500    mRootPtr = &kernel;
501}
502
503RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
504}
505
506void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
507    s->mHal.info.exportedVariableCount = 0;
508}
509
510RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
511                                      const Script *s, const Element *e) {
512    return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
513}
514