1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21using namespace android;
22using namespace android::renderscript;
23
24namespace android {
25namespace renderscript {
26
27
28class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29public:
30    virtual void populateScript(Script *);
31
32    virtual ~RsdCpuScriptIntrinsicBlend();
33    RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
36    static void kernel(const RsForEachStubParamStruct *p,
37                          uint32_t xstart, uint32_t xend,
38                          uint32_t instep, uint32_t outstep);
39};
40
41}
42}
43
44
45enum {
46    BLEND_CLEAR = 0,
47    BLEND_SRC = 1,
48    BLEND_DST = 2,
49    BLEND_SRC_OVER = 3,
50    BLEND_DST_OVER = 4,
51    BLEND_SRC_IN = 5,
52    BLEND_DST_IN = 6,
53    BLEND_SRC_OUT = 7,
54    BLEND_DST_OUT = 8,
55    BLEND_SRC_ATOP = 9,
56    BLEND_DST_ATOP = 10,
57    BLEND_XOR = 11,
58
59    BLEND_NORMAL = 12,
60    BLEND_AVERAGE = 13,
61    BLEND_MULTIPLY = 14,
62    BLEND_SCREEN = 15,
63    BLEND_DARKEN = 16,
64    BLEND_LIGHTEN = 17,
65    BLEND_OVERLAY = 18,
66    BLEND_HARDLIGHT = 19,
67    BLEND_SOFTLIGHT = 20,
68    BLEND_DIFFERENCE = 21,
69    BLEND_NEGATION = 22,
70    BLEND_EXCLUSION = 23,
71    BLEND_COLOR_DODGE = 24,
72    BLEND_INVERSE_COLOR_DODGE = 25,
73    BLEND_SOFT_DODGE = 26,
74    BLEND_COLOR_BURN = 27,
75    BLEND_INVERSE_COLOR_BURN = 28,
76    BLEND_SOFT_BURN = 29,
77    BLEND_REFLECT = 30,
78    BLEND_GLOW = 31,
79    BLEND_FREEZE = 32,
80    BLEND_HEAT = 33,
81    BLEND_ADD = 34,
82    BLEND_SUBTRACT = 35,
83    BLEND_STAMP = 36,
84    BLEND_RED = 37,
85    BLEND_GREEN = 38,
86    BLEND_BLUE = 39,
87    BLEND_HUE = 40,
88    BLEND_SATURATION = 41,
89    BLEND_COLOR = 42,
90    BLEND_LUMINOSITY = 43
91};
92
93#if defined(ARCH_ARM_USE_INTRINSICS)
94extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
95                    uint32_t xstart, uint32_t xend);
96#endif
97
98#if defined(ARCH_X86_HAVE_SSSE3)
99extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
100extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
101extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
102extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
103extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
104extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
105extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
106extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
107extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
108extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
109extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
110extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
111#endif
112
113void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
114                                        uint32_t xstart, uint32_t xend,
115                                        uint32_t instep, uint32_t outstep) {
116    RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
117
118    // instep/outstep can be ignored--sizeof(uchar4) known at compile time
119    uchar4 *out = (uchar4 *)p->out;
120    uchar4 *in = (uchar4 *)p->in;
121    uint32_t x1 = xstart;
122    uint32_t x2 = xend;
123
124#if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
125    if (gArchUseSIMD) {
126        if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0)
127            return;
128    }
129#endif
130    switch (p->slot) {
131    case BLEND_CLEAR:
132        for (;x1 < x2; x1++, out++) {
133            *out = 0;
134        }
135        break;
136    case BLEND_SRC:
137        for (;x1 < x2; x1++, out++, in++) {
138          *out = *in;
139        }
140        break;
141    //BLEND_DST is a NOP
142    case BLEND_DST:
143        break;
144    case BLEND_SRC_OVER:
145    #if defined(ARCH_X86_HAVE_SSSE3)
146        if (gArchUseSIMD) {
147            if ((x1 + 8) < x2) {
148                uint32_t len = (x2 - x1) >> 3;
149                rsdIntrinsicBlendSrcOver_K(out, in, len);
150                x1 += len << 3;
151                out += len << 3;
152                in += len << 3;
153            }
154        }
155    #endif
156        for (;x1 < x2; x1++, out++, in++) {
157            short4 in_s = convert_short4(*in);
158            short4 out_s = convert_short4(*out);
159            in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
160            *out = convert_uchar4(in_s);
161        }
162        break;
163    case BLEND_DST_OVER:
164    #if defined(ARCH_X86_HAVE_SSSE3)
165        if (gArchUseSIMD) {
166            if ((x1 + 8) < x2) {
167                uint32_t len = (x2 - x1) >> 3;
168                rsdIntrinsicBlendDstOver_K(out, in, len);
169                x1 += len << 3;
170                out += len << 3;
171                in += len << 3;
172            }
173        }
174     #endif
175        for (;x1 < x2; x1++, out++, in++) {
176            short4 in_s = convert_short4(*in);
177            short4 out_s = convert_short4(*out);
178            in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
179            *out = convert_uchar4(in_s);
180        }
181        break;
182    case BLEND_SRC_IN:
183    #if defined(ARCH_X86_HAVE_SSSE3)
184        if (gArchUseSIMD) {
185            if ((x1 + 8) < x2) {
186                uint32_t len = (x2 - x1) >> 3;
187                rsdIntrinsicBlendSrcIn_K(out, in, len);
188                x1 += len << 3;
189                out += len << 3;
190                in += len << 3;
191            }
192        }
193    #endif
194        for (;x1 < x2; x1++, out++, in++) {
195            short4 in_s = convert_short4(*in);
196            in_s = (in_s * out->w) >> (short4)8;
197            *out = convert_uchar4(in_s);
198        }
199        break;
200    case BLEND_DST_IN:
201    #if defined(ARCH_X86_HAVE_SSSE3)
202        if (gArchUseSIMD) {
203            if ((x1 + 8) < x2) {
204                uint32_t len = (x2 - x1) >> 3;
205                rsdIntrinsicBlendDstIn_K(out, in, len);
206                x1 += len << 3;
207                out += len << 3;
208                in += len << 3;
209            }
210        }
211     #endif
212        for (;x1 < x2; x1++, out++, in++) {
213            short4 out_s = convert_short4(*out);
214            out_s = (out_s * in->w) >> (short4)8;
215            *out = convert_uchar4(out_s);
216        }
217        break;
218    case BLEND_SRC_OUT:
219    #if defined(ARCH_X86_HAVE_SSSE3)
220        if (gArchUseSIMD) {
221            if ((x1 + 8) < x2) {
222                uint32_t len = (x2 - x1) >> 3;
223                rsdIntrinsicBlendSrcOut_K(out, in, len);
224                x1 += len << 3;
225                out += len << 3;
226                in += len << 3;
227            }
228        }
229    #endif
230        for (;x1 < x2; x1++, out++, in++) {
231            short4 in_s = convert_short4(*in);
232            in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
233            *out = convert_uchar4(in_s);
234        }
235        break;
236    case BLEND_DST_OUT:
237    #if defined(ARCH_X86_HAVE_SSSE3)
238        if (gArchUseSIMD) {
239            if ((x1 + 8) < x2) {
240                uint32_t len = (x2 - x1) >> 3;
241                rsdIntrinsicBlendDstOut_K(out, in, len);
242                x1 += len << 3;
243                out += len << 3;
244                in += len << 3;
245            }
246        }
247    #endif
248        for (;x1 < x2; x1++, out++, in++) {
249            short4 out_s = convert_short4(*out);
250            out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
251            *out = convert_uchar4(out_s);
252        }
253        break;
254    case BLEND_SRC_ATOP:
255    #if defined(ARCH_X86_HAVE_SSSE3)
256        if (gArchUseSIMD) {
257            if ((x1 + 8) < x2) {
258                uint32_t len = (x2 - x1) >> 3;
259                rsdIntrinsicBlendSrcAtop_K(out, in, len);
260                x1 += len << 3;
261                out += len << 3;
262                in += len << 3;
263            }
264        }
265    #endif
266        for (;x1 < x2; x1++, out++, in++) {
267            short4 in_s = convert_short4(*in);
268            short4 out_s = convert_short4(*out);
269            out_s.xyz = ((in_s.xyz * out_s.w) +
270              (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
271            *out = convert_uchar4(out_s);
272        }
273        break;
274    case BLEND_DST_ATOP:
275    #if defined(ARCH_X86_HAVE_SSSE3)
276        if (gArchUseSIMD) {
277            if ((x1 + 8) < x2) {
278                uint32_t len = (x2 - x1) >> 3;
279                rsdIntrinsicBlendDstAtop_K(out, in, len);
280                x1 += len << 3;
281                out += len << 3;
282                in += len << 3;
283            }
284        }
285     #endif
286        for (;x1 < x2; x1++, out++, in++) {
287            short4 in_s = convert_short4(*in);
288            short4 out_s = convert_short4(*out);
289            out_s.xyz = ((out_s.xyz * in_s.w) +
290              (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
291            *out = convert_uchar4(out_s);
292        }
293        break;
294    case BLEND_XOR:
295    #if defined(ARCH_X86_HAVE_SSSE3)
296        if (gArchUseSIMD) {
297            if ((x1 + 8) < x2) {
298                uint32_t len = (x2 - x1) >> 3;
299                rsdIntrinsicBlendXor_K(out, in, len);
300                x1 += len << 3;
301                out += len << 3;
302                in += len << 3;
303            }
304        }
305    #endif
306        for (;x1 < x2; x1++, out++, in++) {
307            *out = *in ^ *out;
308        }
309        break;
310    case BLEND_NORMAL:
311        ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
312        rsAssert(false);
313        break;
314    case BLEND_AVERAGE:
315        ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
316        rsAssert(false);
317        break;
318    case BLEND_MULTIPLY:
319    #if defined(ARCH_X86_HAVE_SSSE3)
320        if (gArchUseSIMD) {
321            if ((x1 + 8) < x2) {
322                uint32_t len = (x2 - x1) >> 3;
323                rsdIntrinsicBlendMultiply_K(out, in, len);
324                x1 += len << 3;
325                out += len << 3;
326                in += len << 3;
327            }
328        }
329    #endif
330        for (;x1 < x2; x1++, out++, in++) {
331          *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
332                                >> (short4)8);
333        }
334        break;
335    case BLEND_SCREEN:
336        ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
337        rsAssert(false);
338        break;
339    case BLEND_DARKEN:
340        ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
341        rsAssert(false);
342        break;
343    case BLEND_LIGHTEN:
344        ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
345        rsAssert(false);
346        break;
347    case BLEND_OVERLAY:
348        ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
349        rsAssert(false);
350        break;
351    case BLEND_HARDLIGHT:
352        ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
353        rsAssert(false);
354        break;
355    case BLEND_SOFTLIGHT:
356        ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
357        rsAssert(false);
358        break;
359    case BLEND_DIFFERENCE:
360        ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
361        rsAssert(false);
362        break;
363    case BLEND_NEGATION:
364        ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
365        rsAssert(false);
366        break;
367    case BLEND_EXCLUSION:
368        ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
369        rsAssert(false);
370        break;
371    case BLEND_COLOR_DODGE:
372        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
373        rsAssert(false);
374        break;
375    case BLEND_INVERSE_COLOR_DODGE:
376        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
377        rsAssert(false);
378        break;
379    case BLEND_SOFT_DODGE:
380        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
381        rsAssert(false);
382        break;
383    case BLEND_COLOR_BURN:
384        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
385        rsAssert(false);
386        break;
387    case BLEND_INVERSE_COLOR_BURN:
388        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
389        rsAssert(false);
390        break;
391    case BLEND_SOFT_BURN:
392        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
393        rsAssert(false);
394        break;
395    case BLEND_REFLECT:
396        ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
397        rsAssert(false);
398        break;
399    case BLEND_GLOW:
400        ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
401        rsAssert(false);
402        break;
403    case BLEND_FREEZE:
404        ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
405        rsAssert(false);
406        break;
407    case BLEND_HEAT:
408        ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
409        rsAssert(false);
410        break;
411    case BLEND_ADD:
412    #if defined(ARCH_X86_HAVE_SSSE3)
413        if (gArchUseSIMD) {
414            if((x1 + 8) < x2) {
415                uint32_t len = (x2 - x1) >> 3;
416                rsdIntrinsicBlendAdd_K(out, in, len);
417                x1 += len << 3;
418                out += len << 3;
419                in += len << 3;
420            }
421        }
422    #endif
423        for (;x1 < x2; x1++, out++, in++) {
424            uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
425                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
426            out->x = (oR + iR) > 255 ? 255 : oR + iR;
427            out->y = (oG + iG) > 255 ? 255 : oG + iG;
428            out->z = (oB + iB) > 255 ? 255 : oB + iB;
429            out->w = (oA + iA) > 255 ? 255 : oA + iA;
430        }
431        break;
432    case BLEND_SUBTRACT:
433    #if defined(ARCH_X86_HAVE_SSSE3)
434        if (gArchUseSIMD) {
435            if((x1 + 8) < x2) {
436                uint32_t len = (x2 - x1) >> 3;
437                rsdIntrinsicBlendSub_K(out, in, len);
438                x1 += len << 3;
439                out += len << 3;
440                in += len << 3;
441            }
442        }
443    #endif
444        for (;x1 < x2; x1++, out++, in++) {
445            int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
446                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
447            out->x = (oR - iR) < 0 ? 0 : oR - iR;
448            out->y = (oG - iG) < 0 ? 0 : oG - iG;
449            out->z = (oB - iB) < 0 ? 0 : oB - iB;
450            out->w = (oA - iA) < 0 ? 0 : oA - iA;
451        }
452        break;
453    case BLEND_STAMP:
454        ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
455        rsAssert(false);
456        break;
457    case BLEND_RED:
458        ALOGE("Called unimplemented blend intrinsic BLEND_RED");
459        rsAssert(false);
460        break;
461    case BLEND_GREEN:
462        ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
463        rsAssert(false);
464        break;
465    case BLEND_BLUE:
466        ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
467        rsAssert(false);
468        break;
469    case BLEND_HUE:
470        ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
471        rsAssert(false);
472        break;
473    case BLEND_SATURATION:
474        ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
475        rsAssert(false);
476        break;
477    case BLEND_COLOR:
478        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
479        rsAssert(false);
480        break;
481    case BLEND_LUMINOSITY:
482        ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
483        rsAssert(false);
484        break;
485
486    default:
487        ALOGE("Called unimplemented value %d", p->slot);
488        rsAssert(false);
489
490    }
491}
492
493
494RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
495                                                       const Script *s, const Element *e)
496            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
497
498    mRootPtr = &kernel;
499}
500
501RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
502}
503
504void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
505    s->mHal.info.exportedVariableCount = 0;
506}
507
508RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
509                                      const Script *s, const Element *e) {
510    return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
511}
512
513
514
515