1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18#include "rsCpuIntrinsic.h"
19#include "rsCpuIntrinsicInlines.h"
20
21using namespace android;
22using namespace android::renderscript;
23
24namespace android {
25namespace renderscript {
26
27
28class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
29public:
30    virtual void populateScript(Script *);
31
32    virtual ~RsdCpuScriptIntrinsicBlend();
33    RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34
35protected:
36    static void kernel(const RsForEachStubParamStruct *p,
37                          uint32_t xstart, uint32_t xend,
38                          uint32_t instep, uint32_t outstep);
39};
40
41}
42}
43
44
45enum {
46    BLEND_CLEAR = 0,
47    BLEND_SRC = 1,
48    BLEND_DST = 2,
49    BLEND_SRC_OVER = 3,
50    BLEND_DST_OVER = 4,
51    BLEND_SRC_IN = 5,
52    BLEND_DST_IN = 6,
53    BLEND_SRC_OUT = 7,
54    BLEND_DST_OUT = 8,
55    BLEND_SRC_ATOP = 9,
56    BLEND_DST_ATOP = 10,
57    BLEND_XOR = 11,
58
59    BLEND_NORMAL = 12,
60    BLEND_AVERAGE = 13,
61    BLEND_MULTIPLY = 14,
62    BLEND_SCREEN = 15,
63    BLEND_DARKEN = 16,
64    BLEND_LIGHTEN = 17,
65    BLEND_OVERLAY = 18,
66    BLEND_HARDLIGHT = 19,
67    BLEND_SOFTLIGHT = 20,
68    BLEND_DIFFERENCE = 21,
69    BLEND_NEGATION = 22,
70    BLEND_EXCLUSION = 23,
71    BLEND_COLOR_DODGE = 24,
72    BLEND_INVERSE_COLOR_DODGE = 25,
73    BLEND_SOFT_DODGE = 26,
74    BLEND_COLOR_BURN = 27,
75    BLEND_INVERSE_COLOR_BURN = 28,
76    BLEND_SOFT_BURN = 29,
77    BLEND_REFLECT = 30,
78    BLEND_GLOW = 31,
79    BLEND_FREEZE = 32,
80    BLEND_HEAT = 33,
81    BLEND_ADD = 34,
82    BLEND_SUBTRACT = 35,
83    BLEND_STAMP = 36,
84    BLEND_RED = 37,
85    BLEND_GREEN = 38,
86    BLEND_BLUE = 39,
87    BLEND_HUE = 40,
88    BLEND_SATURATION = 41,
89    BLEND_COLOR = 42,
90    BLEND_LUMINOSITY = 43
91};
92
93extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
94extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
95extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
96extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
97extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
98extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
99extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
100extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
101extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
102extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
103extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
104extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
105
106void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
107                                        uint32_t xstart, uint32_t xend,
108                                        uint32_t instep, uint32_t outstep) {
109    RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
110
111    // instep/outstep can be ignored--sizeof(uchar4) known at compile time
112    uchar4 *out = (uchar4 *)p->out;
113    uchar4 *in = (uchar4 *)p->in;
114    uint32_t x1 = xstart;
115    uint32_t x2 = xend;
116
117    switch (p->slot) {
118    case BLEND_CLEAR:
119        for (;x1 < x2; x1++, out++) {
120            *out = 0;
121        }
122        break;
123    case BLEND_SRC:
124        for (;x1 < x2; x1++, out++, in++) {
125          *out = *in;
126        }
127        break;
128    //BLEND_DST is a NOP
129    case BLEND_DST:
130        break;
131    case BLEND_SRC_OVER:
132#if defined(ARCH_ARM_HAVE_VFP)
133        if (gArchUseSIMD) {
134            if((x1 + 8) < x2) {
135                uint32_t len = (x2 - x1) >> 3;
136                rsdIntrinsicBlendSrcOver_K(out, in, len);
137                x1 += len << 3;
138                out += len << 3;
139                in += len << 3;
140            }
141        }
142#endif
143        for (;x1 < x2; x1++, out++, in++) {
144            short4 in_s = convert_short4(*in);
145            short4 out_s = convert_short4(*out);
146            in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
147            *out = convert_uchar4(in_s);
148        }
149        break;
150    case BLEND_DST_OVER:
151#if defined(ARCH_ARM_HAVE_VFP)
152        if (gArchUseSIMD) {
153            if((x1 + 8) < x2) {
154                uint32_t len = (x2 - x1) >> 3;
155                rsdIntrinsicBlendDstOver_K(out, in, len);
156                x1 += len << 3;
157                out += len << 3;
158                in += len << 3;
159            }
160        }
161#endif
162        for (;x1 < x2; x1++, out++, in++) {
163            short4 in_s = convert_short4(*in);
164            short4 out_s = convert_short4(*out);
165            in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
166            *out = convert_uchar4(in_s);
167        }
168        break;
169    case BLEND_SRC_IN:
170#if defined(ARCH_ARM_HAVE_VFP)
171        if (gArchUseSIMD) {
172            if((x1 + 8) < x2) {
173                uint32_t len = (x2 - x1) >> 3;
174                rsdIntrinsicBlendSrcIn_K(out, in, len);
175                x1 += len << 3;
176                out += len << 3;
177                in += len << 3;
178            }
179        }
180#endif
181        for (;x1 < x2; x1++, out++, in++) {
182            short4 in_s = convert_short4(*in);
183            in_s = (in_s * out->w) >> (short4)8;
184            *out = convert_uchar4(in_s);
185        }
186        break;
187    case BLEND_DST_IN:
188#if defined(ARCH_ARM_HAVE_VFP)
189        if (gArchUseSIMD) {
190            if((x1 + 8) < x2) {
191                uint32_t len = (x2 - x1) >> 3;
192                rsdIntrinsicBlendDstIn_K(out, in, len);
193                x1 += len << 3;
194                out += len << 3;
195                in += len << 3;
196            }
197        }
198#endif
199        for (;x1 < x2; x1++, out++, in++) {
200            short4 out_s = convert_short4(*out);
201            out_s = (out_s * in->w) >> (short4)8;
202            *out = convert_uchar4(out_s);
203        }
204        break;
205    case BLEND_SRC_OUT:
206#if defined(ARCH_ARM_HAVE_VFP)
207        if (gArchUseSIMD) {
208            if((x1 + 8) < x2) {
209                uint32_t len = (x2 - x1) >> 3;
210                rsdIntrinsicBlendSrcOut_K(out, in, len);
211                x1 += len << 3;
212                out += len << 3;
213                in += len << 3;
214            }
215        }
216#endif
217        for (;x1 < x2; x1++, out++, in++) {
218            short4 in_s = convert_short4(*in);
219            in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
220            *out = convert_uchar4(in_s);
221        }
222        break;
223    case BLEND_DST_OUT:
224#if defined(ARCH_ARM_HAVE_VFP)
225        if (gArchUseSIMD) {
226            if((x1 + 8) < x2) {
227                uint32_t len = (x2 - x1) >> 3;
228                rsdIntrinsicBlendDstOut_K(out, in, len);
229                x1 += len << 3;
230                out += len << 3;
231                in += len << 3;
232            }
233        }
234#endif
235        for (;x1 < x2; x1++, out++, in++) {
236            short4 out_s = convert_short4(*out);
237            out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
238            *out = convert_uchar4(out_s);
239        }
240        break;
241    case BLEND_SRC_ATOP:
242#if defined(ARCH_ARM_HAVE_VFP)
243        if (gArchUseSIMD) {
244            if((x1 + 8) < x2) {
245                uint32_t len = (x2 - x1) >> 3;
246                rsdIntrinsicBlendSrcAtop_K(out, in, len);
247                x1 += len << 3;
248                out += len << 3;
249                in += len << 3;
250            }
251        }
252#endif
253        for (;x1 < x2; x1++, out++, in++) {
254            short4 in_s = convert_short4(*in);
255            short4 out_s = convert_short4(*out);
256            out_s.xyz = ((in_s.xyz * out_s.w) +
257              (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
258            *out = convert_uchar4(out_s);
259        }
260        break;
261    case BLEND_DST_ATOP:
262#if defined(ARCH_ARM_HAVE_VFP)
263        if (gArchUseSIMD) {
264            if((x1 + 8) < x2) {
265                uint32_t len = (x2 - x1) >> 3;
266                rsdIntrinsicBlendDstAtop_K(out, in, len);
267                x1 += len << 3;
268                out += len << 3;
269                in += len << 3;
270            }
271        }
272#endif
273        for (;x1 < x2; x1++, out++, in++) {
274            short4 in_s = convert_short4(*in);
275            short4 out_s = convert_short4(*out);
276            out_s.xyz = ((out_s.xyz * in_s.w) +
277              (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
278            *out = convert_uchar4(out_s);
279        }
280        break;
281    case BLEND_XOR:
282#if defined(ARCH_ARM_HAVE_VFP)
283        if (gArchUseSIMD) {
284            if((x1 + 8) < x2) {
285                uint32_t len = (x2 - x1) >> 3;
286                rsdIntrinsicBlendXor_K(out, in, len);
287                x1 += len << 3;
288                out += len << 3;
289                in += len << 3;
290            }
291        }
292#endif
293        for (;x1 < x2; x1++, out++, in++) {
294            *out = *in ^ *out;
295        }
296        break;
297    case BLEND_NORMAL:
298        ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
299        rsAssert(false);
300        break;
301    case BLEND_AVERAGE:
302        ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
303        rsAssert(false);
304        break;
305    case BLEND_MULTIPLY:
306#if defined(ARCH_ARM_HAVE_VFP)
307        if (gArchUseSIMD) {
308            if((x1 + 8) < x2) {
309                uint32_t len = (x2 - x1) >> 3;
310                rsdIntrinsicBlendMultiply_K(out, in, len);
311                x1 += len << 3;
312                out += len << 3;
313                in += len << 3;
314            }
315        }
316#endif
317        for (;x1 < x2; x1++, out++, in++) {
318          *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
319                                >> (short4)8);
320        }
321        break;
322    case BLEND_SCREEN:
323        ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
324        rsAssert(false);
325        break;
326    case BLEND_DARKEN:
327        ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
328        rsAssert(false);
329        break;
330    case BLEND_LIGHTEN:
331        ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
332        rsAssert(false);
333        break;
334    case BLEND_OVERLAY:
335        ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
336        rsAssert(false);
337        break;
338    case BLEND_HARDLIGHT:
339        ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
340        rsAssert(false);
341        break;
342    case BLEND_SOFTLIGHT:
343        ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
344        rsAssert(false);
345        break;
346    case BLEND_DIFFERENCE:
347        ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
348        rsAssert(false);
349        break;
350    case BLEND_NEGATION:
351        ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
352        rsAssert(false);
353        break;
354    case BLEND_EXCLUSION:
355        ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
356        rsAssert(false);
357        break;
358    case BLEND_COLOR_DODGE:
359        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
360        rsAssert(false);
361        break;
362    case BLEND_INVERSE_COLOR_DODGE:
363        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
364        rsAssert(false);
365        break;
366    case BLEND_SOFT_DODGE:
367        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
368        rsAssert(false);
369        break;
370    case BLEND_COLOR_BURN:
371        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
372        rsAssert(false);
373        break;
374    case BLEND_INVERSE_COLOR_BURN:
375        ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
376        rsAssert(false);
377        break;
378    case BLEND_SOFT_BURN:
379        ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
380        rsAssert(false);
381        break;
382    case BLEND_REFLECT:
383        ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
384        rsAssert(false);
385        break;
386    case BLEND_GLOW:
387        ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
388        rsAssert(false);
389        break;
390    case BLEND_FREEZE:
391        ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
392        rsAssert(false);
393        break;
394    case BLEND_HEAT:
395        ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
396        rsAssert(false);
397        break;
398    case BLEND_ADD:
399#if defined(ARCH_ARM_HAVE_VFP)
400        if (gArchUseSIMD) {
401            if((x1 + 8) < x2) {
402                uint32_t len = (x2 - x1) >> 3;
403                rsdIntrinsicBlendAdd_K(out, in, len);
404                x1 += len << 3;
405                out += len << 3;
406                in += len << 3;
407            }
408        }
409#endif
410        for (;x1 < x2; x1++, out++, in++) {
411            uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
412                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
413            out->x = (oR + iR) > 255 ? 255 : oR + iR;
414            out->y = (oG + iG) > 255 ? 255 : oG + iG;
415            out->z = (oB + iB) > 255 ? 255 : oB + iB;
416            out->w = (oA + iA) > 255 ? 255 : oA + iA;
417        }
418        break;
419    case BLEND_SUBTRACT:
420#if defined(ARCH_ARM_HAVE_VFP)
421        if (gArchUseSIMD) {
422            if((x1 + 8) < x2) {
423                uint32_t len = (x2 - x1) >> 3;
424                rsdIntrinsicBlendSub_K(out, in, len);
425                x1 += len << 3;
426                out += len << 3;
427                in += len << 3;
428            }
429        }
430#endif
431        for (;x1 < x2; x1++, out++, in++) {
432            int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
433                oR = out->x, oG = out->y, oB = out->z, oA = out->w;
434            out->x = (oR - iR) < 0 ? 0 : oR - iR;
435            out->y = (oG - iG) < 0 ? 0 : oG - iG;
436            out->z = (oB - iB) < 0 ? 0 : oB - iB;
437            out->w = (oA - iA) < 0 ? 0 : oA - iA;
438        }
439        break;
440    case BLEND_STAMP:
441        ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
442        rsAssert(false);
443        break;
444    case BLEND_RED:
445        ALOGE("Called unimplemented blend intrinsic BLEND_RED");
446        rsAssert(false);
447        break;
448    case BLEND_GREEN:
449        ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
450        rsAssert(false);
451        break;
452    case BLEND_BLUE:
453        ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
454        rsAssert(false);
455        break;
456    case BLEND_HUE:
457        ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
458        rsAssert(false);
459        break;
460    case BLEND_SATURATION:
461        ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
462        rsAssert(false);
463        break;
464    case BLEND_COLOR:
465        ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
466        rsAssert(false);
467        break;
468    case BLEND_LUMINOSITY:
469        ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
470        rsAssert(false);
471        break;
472
473    default:
474        ALOGE("Called unimplemented value %d", p->slot);
475        rsAssert(false);
476
477    }
478}
479
480
481RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
482                                                       const Script *s, const Element *e)
483            : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
484
485    mRootPtr = &kernel;
486}
487
488RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
489}
490
491void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
492    s->mHal.info.exportedVariableCount = 0;
493}
494
495RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
496                                      const Script *s, const Element *e) {
497    return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
498}
499
500
501
502