1/*
2 * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
3 *             2005 Lars Knoll & Zack Rusin, Trolltech
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Keith Packard not be used in
10 * advertising or publicity pertaining to distribution of the software without
11 * specific, written prior permission.  Keith Packard makes no
12 * representations about the suitability of this software for any purpose.  It
13 * is provided "as is" without express or implied warranty.
14 *
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
22 * SOFTWARE.
23 */
24#ifdef HAVE_CONFIG_H
25#include <config.h>
26#endif
27
28#include <math.h>
29#include <string.h>
30
31#include "pixman-private.h"
32#include "pixman-combine32.h"
33
34/* component alpha helper functions */
35
36static void
37combine_mask_ca (uint32_t *src, uint32_t *mask)
38{
39    uint32_t a = *mask;
40
41    uint32_t x;
42    uint16_t xa;
43
44    if (!a)
45    {
46	*(src) = 0;
47	return;
48    }
49
50    x = *(src);
51    if (a == ~0)
52    {
53	x = x >> A_SHIFT;
54	x |= x << G_SHIFT;
55	x |= x << R_SHIFT;
56	*(mask) = x;
57	return;
58    }
59
60    xa = x >> A_SHIFT;
61    UN8x4_MUL_UN8x4 (x, a);
62    *(src) = x;
63
64    UN8x4_MUL_UN8 (a, xa);
65    *(mask) = a;
66}
67
68static void
69combine_mask_value_ca (uint32_t *src, const uint32_t *mask)
70{
71    uint32_t a = *mask;
72    uint32_t x;
73
74    if (!a)
75    {
76	*(src) = 0;
77	return;
78    }
79
80    if (a == ~0)
81	return;
82
83    x = *(src);
84    UN8x4_MUL_UN8x4 (x, a);
85    *(src) = x;
86}
87
88static void
89combine_mask_alpha_ca (const uint32_t *src, uint32_t *mask)
90{
91    uint32_t a = *(mask);
92    uint32_t x;
93
94    if (!a)
95	return;
96
97    x = *(src) >> A_SHIFT;
98    if (x == MASK)
99	return;
100
101    if (a == ~0)
102    {
103	x |= x << G_SHIFT;
104	x |= x << R_SHIFT;
105	*(mask) = x;
106	return;
107    }
108
109    UN8x4_MUL_UN8 (a, x);
110    *(mask) = a;
111}
112
113/*
114 * There are two ways of handling alpha -- either as a single unified value or
115 * a separate value for each component, hence each macro must have two
116 * versions.  The unified alpha version has a 'u' at the end of the name,
117 * the component version has a 'ca'.  Similarly, functions which deal with
118 * this difference will have two versions using the same convention.
119 */
120
121static force_inline uint32_t
122combine_mask (const uint32_t *src, const uint32_t *mask, int i)
123{
124    uint32_t s, m;
125
126    if (mask)
127    {
128	m = *(mask + i) >> A_SHIFT;
129
130	if (!m)
131	    return 0;
132    }
133
134    s = *(src + i);
135
136    if (mask)
137	UN8x4_MUL_UN8 (s, m);
138
139    return s;
140}
141
142static void
143combine_clear (pixman_implementation_t *imp,
144               pixman_op_t              op,
145               uint32_t *                dest,
146               const uint32_t *          src,
147               const uint32_t *          mask,
148               int                      width)
149{
150    memset (dest, 0, width * sizeof(uint32_t));
151}
152
153static void
154combine_dst (pixman_implementation_t *imp,
155	     pixman_op_t	      op,
156	     uint32_t *		      dest,
157	     const uint32_t *	      src,
158	     const uint32_t *          mask,
159	     int		      width)
160{
161    return;
162}
163
164static void
165combine_src_u (pixman_implementation_t *imp,
166               pixman_op_t              op,
167               uint32_t *                dest,
168               const uint32_t *          src,
169               const uint32_t *          mask,
170               int                      width)
171{
172    int i;
173
174    if (!mask)
175    {
176	memcpy (dest, src, width * sizeof (uint32_t));
177    }
178    else
179    {
180	for (i = 0; i < width; ++i)
181	{
182	    uint32_t s = combine_mask (src, mask, i);
183
184	    *(dest + i) = s;
185	}
186    }
187}
188
189static void
190combine_over_u (pixman_implementation_t *imp,
191                pixman_op_t              op,
192                uint32_t *                dest,
193                const uint32_t *          src,
194                const uint32_t *          mask,
195                int                      width)
196{
197    int i;
198
199    if (!mask)
200    {
201	for (i = 0; i < width; ++i)
202	{
203	    uint32_t s = *(src + i);
204	    uint32_t a = ALPHA_8 (s);
205	    if (a == 0xFF)
206	    {
207		*(dest + i) = s;
208	    }
209	    else if (s)
210	    {
211		uint32_t d = *(dest + i);
212		uint32_t ia = a ^ 0xFF;
213		UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
214		*(dest + i) = d;
215	    }
216	}
217    }
218    else
219    {
220	for (i = 0; i < width; ++i)
221	{
222	    uint32_t m = ALPHA_8 (*(mask + i));
223	    if (m == 0xFF)
224	    {
225		uint32_t s = *(src + i);
226		uint32_t a = ALPHA_8 (s);
227		if (a == 0xFF)
228		{
229		    *(dest + i) = s;
230		}
231		else if (s)
232		{
233		    uint32_t d = *(dest + i);
234		    uint32_t ia = a ^ 0xFF;
235		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
236		    *(dest + i) = d;
237		}
238	    }
239	    else if (m)
240	    {
241		uint32_t s = *(src + i);
242		if (s)
243		{
244		    uint32_t d = *(dest + i);
245		    UN8x4_MUL_UN8 (s, m);
246		    UN8x4_MUL_UN8_ADD_UN8x4 (d, ALPHA_8 (~s), s);
247		    *(dest + i) = d;
248		}
249	    }
250	}
251    }
252}
253
254static void
255combine_over_reverse_u (pixman_implementation_t *imp,
256                        pixman_op_t              op,
257                        uint32_t *                dest,
258                        const uint32_t *          src,
259                        const uint32_t *          mask,
260                        int                      width)
261{
262    int i;
263
264    for (i = 0; i < width; ++i)
265    {
266	uint32_t s = combine_mask (src, mask, i);
267	uint32_t d = *(dest + i);
268	uint32_t ia = ALPHA_8 (~*(dest + i));
269	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
270	*(dest + i) = s;
271    }
272}
273
274static void
275combine_in_u (pixman_implementation_t *imp,
276              pixman_op_t              op,
277              uint32_t *                dest,
278              const uint32_t *          src,
279              const uint32_t *          mask,
280              int                      width)
281{
282    int i;
283
284    for (i = 0; i < width; ++i)
285    {
286	uint32_t s = combine_mask (src, mask, i);
287	uint32_t a = ALPHA_8 (*(dest + i));
288	UN8x4_MUL_UN8 (s, a);
289	*(dest + i) = s;
290    }
291}
292
293static void
294combine_in_reverse_u (pixman_implementation_t *imp,
295                      pixman_op_t              op,
296                      uint32_t *                dest,
297                      const uint32_t *          src,
298                      const uint32_t *          mask,
299                      int                      width)
300{
301    int i;
302
303    for (i = 0; i < width; ++i)
304    {
305	uint32_t s = combine_mask (src, mask, i);
306	uint32_t d = *(dest + i);
307	uint32_t a = ALPHA_8 (s);
308	UN8x4_MUL_UN8 (d, a);
309	*(dest + i) = d;
310    }
311}
312
313static void
314combine_out_u (pixman_implementation_t *imp,
315               pixman_op_t              op,
316               uint32_t *                dest,
317               const uint32_t *          src,
318               const uint32_t *          mask,
319               int                      width)
320{
321    int i;
322
323    for (i = 0; i < width; ++i)
324    {
325	uint32_t s = combine_mask (src, mask, i);
326	uint32_t a = ALPHA_8 (~*(dest + i));
327	UN8x4_MUL_UN8 (s, a);
328	*(dest + i) = s;
329    }
330}
331
332static void
333combine_out_reverse_u (pixman_implementation_t *imp,
334                       pixman_op_t              op,
335                       uint32_t *                dest,
336                       const uint32_t *          src,
337                       const uint32_t *          mask,
338                       int                      width)
339{
340    int i;
341
342    for (i = 0; i < width; ++i)
343    {
344	uint32_t s = combine_mask (src, mask, i);
345	uint32_t d = *(dest + i);
346	uint32_t a = ALPHA_8 (~s);
347	UN8x4_MUL_UN8 (d, a);
348	*(dest + i) = d;
349    }
350}
351
352static void
353combine_atop_u (pixman_implementation_t *imp,
354                pixman_op_t              op,
355                uint32_t *                dest,
356                const uint32_t *          src,
357                const uint32_t *          mask,
358                int                      width)
359{
360    int i;
361
362    for (i = 0; i < width; ++i)
363    {
364	uint32_t s = combine_mask (src, mask, i);
365	uint32_t d = *(dest + i);
366	uint32_t dest_a = ALPHA_8 (d);
367	uint32_t src_ia = ALPHA_8 (~s);
368
369	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
370	*(dest + i) = s;
371    }
372}
373
374static void
375combine_atop_reverse_u (pixman_implementation_t *imp,
376                        pixman_op_t              op,
377                        uint32_t *                dest,
378                        const uint32_t *          src,
379                        const uint32_t *          mask,
380                        int                      width)
381{
382    int i;
383
384    for (i = 0; i < width; ++i)
385    {
386	uint32_t s = combine_mask (src, mask, i);
387	uint32_t d = *(dest + i);
388	uint32_t src_a = ALPHA_8 (s);
389	uint32_t dest_ia = ALPHA_8 (~d);
390
391	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
392	*(dest + i) = s;
393    }
394}
395
396static void
397combine_xor_u (pixman_implementation_t *imp,
398               pixman_op_t              op,
399               uint32_t *                dest,
400               const uint32_t *          src,
401               const uint32_t *          mask,
402               int                      width)
403{
404    int i;
405
406    for (i = 0; i < width; ++i)
407    {
408	uint32_t s = combine_mask (src, mask, i);
409	uint32_t d = *(dest + i);
410	uint32_t src_ia = ALPHA_8 (~s);
411	uint32_t dest_ia = ALPHA_8 (~d);
412
413	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
414	*(dest + i) = s;
415    }
416}
417
418static void
419combine_add_u (pixman_implementation_t *imp,
420               pixman_op_t              op,
421               uint32_t *                dest,
422               const uint32_t *          src,
423               const uint32_t *          mask,
424               int                      width)
425{
426    int i;
427
428    for (i = 0; i < width; ++i)
429    {
430	uint32_t s = combine_mask (src, mask, i);
431	uint32_t d = *(dest + i);
432	UN8x4_ADD_UN8x4 (d, s);
433	*(dest + i) = d;
434    }
435}
436
437static void
438combine_saturate_u (pixman_implementation_t *imp,
439                    pixman_op_t              op,
440                    uint32_t *                dest,
441                    const uint32_t *          src,
442                    const uint32_t *          mask,
443                    int                      width)
444{
445    int i;
446
447    for (i = 0; i < width; ++i)
448    {
449	uint32_t s = combine_mask (src, mask, i);
450	uint32_t d = *(dest + i);
451	uint16_t sa, da;
452
453	sa = s >> A_SHIFT;
454	da = ~d >> A_SHIFT;
455	if (sa > da)
456	{
457	    sa = DIV_UN8 (da, sa);
458	    UN8x4_MUL_UN8 (s, sa);
459	}
460	;
461	UN8x4_ADD_UN8x4 (d, s);
462	*(dest + i) = d;
463    }
464}
465
466/*
467 * PDF blend modes:
468 * The following blend modes have been taken from the PDF ISO 32000
469 * specification, which at this point in time is available from
470 * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
471 * The relevant chapters are 11.3.5 and 11.3.6.
472 * The formula for computing the final pixel color given in 11.3.6 is:
473 * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
474 * with B() being the blend function.
475 * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
476 *
477 * These blend modes should match the SVG filter draft specification, as
478 * it has been designed to mirror ISO 32000. Note that at the current point
479 * no released draft exists that shows this, as the formulas have not been
480 * updated yet after the release of ISO 32000.
481 *
482 * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
483 * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
484 * argument. Note that this implementation operates on premultiplied colors,
485 * while the PDF specification does not. Therefore the code uses the formula
486 * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
487 */
488
489/*
490 * Multiply
491 * B(Dca, ad, Sca, as) = Dca.Sca
492 */
493static void
494combine_multiply_u (pixman_implementation_t *imp,
495                    pixman_op_t              op,
496                    uint32_t *                dest,
497                    const uint32_t *          src,
498                    const uint32_t *          mask,
499                    int                      width)
500{
501    int i;
502
503    for (i = 0; i < width; ++i)
504    {
505	uint32_t s = combine_mask (src, mask, i);
506	uint32_t d = *(dest + i);
507	uint32_t ss = s;
508	uint32_t src_ia = ALPHA_8 (~s);
509	uint32_t dest_ia = ALPHA_8 (~d);
510
511	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (ss, dest_ia, d, src_ia);
512	UN8x4_MUL_UN8x4 (d, s);
513	UN8x4_ADD_UN8x4 (d, ss);
514
515	*(dest + i) = d;
516    }
517}
518
519static void
520combine_multiply_ca (pixman_implementation_t *imp,
521                     pixman_op_t              op,
522                     uint32_t *                dest,
523                     const uint32_t *          src,
524                     const uint32_t *          mask,
525                     int                      width)
526{
527    int i;
528
529    for (i = 0; i < width; ++i)
530    {
531	uint32_t m = *(mask + i);
532	uint32_t s = *(src + i);
533	uint32_t d = *(dest + i);
534	uint32_t r = d;
535	uint32_t dest_ia = ALPHA_8 (~d);
536
537	combine_mask_ca (&s, &m);
538
539	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (r, ~m, s, dest_ia);
540	UN8x4_MUL_UN8x4 (d, s);
541	UN8x4_ADD_UN8x4 (r, d);
542
543	*(dest + i) = r;
544    }
545}
546
547#define PDF_SEPARABLE_BLEND_MODE(name)					\
548    static void								\
549    combine_ ## name ## _u (pixman_implementation_t *imp,		\
550			    pixman_op_t              op,		\
551                            uint32_t *                dest,		\
552			    const uint32_t *          src,		\
553			    const uint32_t *          mask,		\
554			    int                      width)		\
555    {									\
556	int i;								\
557	for (i = 0; i < width; ++i) {					\
558	    uint32_t s = combine_mask (src, mask, i);			\
559	    uint32_t d = *(dest + i);					\
560	    uint8_t sa = ALPHA_8 (s);					\
561	    uint8_t isa = ~sa;						\
562	    uint8_t da = ALPHA_8 (d);					\
563	    uint8_t ida = ~da;						\
564	    uint32_t result;						\
565									\
566	    result = d;							\
567	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
568	    								\
569	    *(dest + i) = result +					\
570		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
571		(blend_ ## name (RED_8 (d), da, RED_8 (s), sa) << R_SHIFT) + \
572		(blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), sa) << G_SHIFT) + \
573		(blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), sa));	\
574	}								\
575    }									\
576    									\
577    static void								\
578    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
579			     pixman_op_t              op,		\
580                             uint32_t *                dest,		\
581			     const uint32_t *          src,		\
582			     const uint32_t *          mask,		\
583			     int                     width)		\
584    {									\
585	int i;								\
586	for (i = 0; i < width; ++i) {					\
587	    uint32_t m = *(mask + i);					\
588	    uint32_t s = *(src + i);					\
589	    uint32_t d = *(dest + i);					\
590	    uint8_t da = ALPHA_8 (d);					\
591	    uint8_t ida = ~da;						\
592	    uint32_t result;						\
593            								\
594	    combine_mask_ca (&s, &m);					\
595            								\
596	    result = d;							\
597	    UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (result, ~m, s, ida);     \
598            								\
599	    result +=							\
600	        (DIV_ONE_UN8 (ALPHA_8 (m) * (uint32_t)da) << A_SHIFT) +	\
601	        (blend_ ## name (RED_8 (d), da, RED_8 (s), RED_8 (m)) << R_SHIFT) + \
602	        (blend_ ## name (GREEN_8 (d), da, GREEN_8 (s), GREEN_8 (m)) << G_SHIFT) + \
603	        (blend_ ## name (BLUE_8 (d), da, BLUE_8 (s), BLUE_8 (m))); \
604	    								\
605	    *(dest + i) = result;					\
606	}								\
607    }
608
609/*
610 * Screen
611 * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
612 */
613static inline uint32_t
614blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
615{
616    return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca);
617}
618
619PDF_SEPARABLE_BLEND_MODE (screen)
620
621/*
622 * Overlay
623 * B(Dca, Da, Sca, Sa) =
624 *   if 2.Dca < Da
625 *     2.Sca.Dca
626 *   otherwise
627 *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
628 */
629static inline uint32_t
630blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
631{
632    uint32_t rca;
633
634    if (2 * dca < da)
635	rca = 2 * sca * dca;
636    else
637	rca = sa * da - 2 * (da - dca) * (sa - sca);
638    return DIV_ONE_UN8 (rca);
639}
640
641PDF_SEPARABLE_BLEND_MODE (overlay)
642
643/*
644 * Darken
645 * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
646 */
647static inline uint32_t
648blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
649{
650    uint32_t s, d;
651
652    s = sca * da;
653    d = dca * sa;
654    return DIV_ONE_UN8 (s > d ? d : s);
655}
656
657PDF_SEPARABLE_BLEND_MODE (darken)
658
659/*
660 * Lighten
661 * B(Dca, Da, Sca, Sa) = max (Sca.Da, Dca.Sa)
662 */
663static inline uint32_t
664blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
665{
666    uint32_t s, d;
667
668    s = sca * da;
669    d = dca * sa;
670    return DIV_ONE_UN8 (s > d ? s : d);
671}
672
673PDF_SEPARABLE_BLEND_MODE (lighten)
674
675/*
676 * Color dodge
677 * B(Dca, Da, Sca, Sa) =
678 *   if Dca == 0
679 *     0
680 *   if Sca == Sa
681 *     Sa.Da
682 *   otherwise
683 *     Sa.Da. min (1, Dca / Da / (1 - Sca/Sa))
684 */
685static inline uint32_t
686blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
687{
688    if (sca >= sa)
689    {
690	return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da);
691    }
692    else
693    {
694	uint32_t rca = dca * sa / (sa - sca);
695	return DIV_ONE_UN8 (sa * MIN (rca, da));
696    }
697}
698
699PDF_SEPARABLE_BLEND_MODE (color_dodge)
700
701/*
702 * Color burn
703 * B(Dca, Da, Sca, Sa) =
704 *   if Dca == Da
705 *     Sa.Da
706 *   if Sca == 0
707 *     0
708 *   otherwise
709 *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
710 */
711static inline uint32_t
712blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
713{
714    if (sca == 0)
715    {
716	return dca < da ? 0 : DIV_ONE_UN8 (sa * da);
717    }
718    else
719    {
720	uint32_t rca = (da - dca) * sa / sca;
721	return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca));
722    }
723}
724
725PDF_SEPARABLE_BLEND_MODE (color_burn)
726
727/*
728 * Hard light
729 * B(Dca, Da, Sca, Sa) =
730 *   if 2.Sca < Sa
731 *     2.Sca.Dca
732 *   otherwise
733 *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
734 */
735static inline uint32_t
736blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
737{
738    if (2 * sca < sa)
739	return DIV_ONE_UN8 (2 * sca * dca);
740    else
741	return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca));
742}
743
744PDF_SEPARABLE_BLEND_MODE (hard_light)
745
746/*
747 * Soft light
748 * B(Dca, Da, Sca, Sa) =
749 *   if (2.Sca <= Sa)
750 *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
751 *   otherwise if Dca.4 <= Da
752 *     Dca.(Sa + (2.Sca - Sa).((16.Dca/Da - 12).Dca/Da + 3)
753 *   otherwise
754 *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
755 */
756static inline uint32_t
757blend_soft_light (uint32_t dca_org,
758		  uint32_t da_org,
759		  uint32_t sca_org,
760		  uint32_t sa_org)
761{
762    double dca = dca_org * (1.0 / MASK);
763    double da = da_org * (1.0 / MASK);
764    double sca = sca_org * (1.0 / MASK);
765    double sa = sa_org * (1.0 / MASK);
766    double rca;
767
768    if (2 * sca < sa)
769    {
770	if (da == 0)
771	    rca = dca * sa;
772	else
773	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
774    }
775    else if (da == 0)
776    {
777	rca = 0;
778    }
779    else if (4 * dca <= da)
780    {
781	rca = dca * sa +
782	    (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
783    }
784    else
785    {
786	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
787    }
788    return rca * MASK + 0.5;
789}
790
791PDF_SEPARABLE_BLEND_MODE (soft_light)
792
793/*
794 * Difference
795 * B(Dca, Da, Sca, Sa) = abs (Dca.Sa - Sca.Da)
796 */
797static inline uint32_t
798blend_difference (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
799{
800    uint32_t dcasa = dca * sa;
801    uint32_t scada = sca * da;
802
803    if (scada < dcasa)
804	return DIV_ONE_UN8 (dcasa - scada);
805    else
806	return DIV_ONE_UN8 (scada - dcasa);
807}
808
809PDF_SEPARABLE_BLEND_MODE (difference)
810
811/*
812 * Exclusion
813 * B(Dca, Da, Sca, Sa) = (Sca.Da + Dca.Sa - 2.Sca.Dca)
814 */
815
816/* This can be made faster by writing it directly and not using
817 * PDF_SEPARABLE_BLEND_MODE, but that's a performance optimization */
818
819static inline uint32_t
820blend_exclusion (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
821{
822    return DIV_ONE_UN8 (sca * da + dca * sa - 2 * dca * sca);
823}
824
825PDF_SEPARABLE_BLEND_MODE (exclusion)
826
827#undef PDF_SEPARABLE_BLEND_MODE
828
829/*
830 * PDF nonseperable blend modes are implemented using the following functions
831 * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
832 * and min value of the red, green and blue components.
833 *
834 * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
835 *
836 * clip_color (C):
837 *   l = LUM (C)
838 *   min = Cmin
839 *   max = Cmax
840 *   if n < 0.0
841 *     C = l + ( ( ( C – l ) × l ) ⁄ ( l – min ) )
842 *   if x > 1.0
843 *     C = l + ( ( ( C – l ) × ( 1 – l ) ) ⁄ ( max – l ) )
844 *   return C
845 *
846 * set_lum (C, l):
847 *   d = l – LUM (C)
848 *   C += d
849 *   return clip_color (C)
850 *
851 * SAT (C) = CH_MAX (C) - CH_MIN (C)
852 *
853 * set_sat (C, s):
854 *  if Cmax > Cmin
855 *    Cmid = ( ( ( Cmid – Cmin ) × s ) ⁄ ( Cmax – Cmin ) )
856 *    Cmax = s
857 *  else
858 *    Cmid = Cmax = 0.0
859 *  Cmin = 0.0
860 *  return C
861 */
862
863/* For premultiplied colors, we need to know what happens when C is
864 * multiplied by a real number. LUM and SAT are linear:
865 *
866 *    LUM (r × C) = r × LUM (C)		SAT (r * C) = r * SAT (C)
867 *
868 * If we extend clip_color with an extra argument a and change
869 *
870 *        if x >= 1.0
871 *
872 * into
873 *
874 *        if x >= a
875 *
876 * then clip_color is also linear:
877 *
878 *    r * clip_color (C, a) = clip_color (r_c, ra);
879 *
880 * for positive r.
881 *
882 * Similarly, we can extend set_lum with an extra argument that is just passed
883 * on to clip_color:
884 *
885 *   r * set_lum ( C, l, a)
886 *
887 *   = r × clip_color ( C + l - LUM (C), a)
888 *
889 *   = clip_color ( r * C + r × l - r * LUM (C), r * a)
890 *
891 *   = set_lum ( r * C, r * l, r * a)
892 *
893 * Finally, set_sat:
894 *
895 *    r * set_sat (C, s) = set_sat (x * C, r * s)
896 *
897 * The above holds for all non-zero x, because the x'es in the fraction for
898 * C_mid cancel out. Specifically, it holds for x = r:
899 *
900 *    r * set_sat (C, s) = set_sat (r_c, rs)
901 *
902 */
903
904/* So, for the non-separable PDF blend modes, we have (using s, d for
905 * non-premultiplied colors, and S, D for premultiplied:
906 *
907 *   Color:
908 *
909 *     a_s * a_d * B(s, d)
910 *   = a_s * a_d * set_lum (S/a_s, LUM (D/a_d), 1)
911 *   = set_lum (S * a_d, a_s * LUM (D), a_s * a_d)
912 *
913 *
914 *   Luminosity:
915 *
916 *     a_s * a_d * B(s, d)
917 *   = a_s * a_d * set_lum (D/a_d, LUM(S/a_s), 1)
918 *   = set_lum (a_s * D, a_d * LUM(S), a_s * a_d)
919 *
920 *
921 *   Saturation:
922 *
923 *     a_s * a_d * B(s, d)
924 *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
925 *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
926 *                                        a_s * LUM (D), a_s * a_d)
927 *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
928 *
929 *   Hue:
930 *
931 *     a_s * a_d * B(s, d)
932 *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
933 *   = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
934 *
935 */
936
937#define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
938#define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
939#define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
940#define SAT(c) (CH_MAX (c) - CH_MIN (c))
941
942#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
943    static void								\
944    combine_ ## name ## _u (pixman_implementation_t *imp,		\
945			    pixman_op_t op,				\
946                            uint32_t *dest,				\
947			    const uint32_t *src,				\
948			    const uint32_t *mask,			\
949			    int width)					\
950    {									\
951	int i;								\
952	for (i = 0; i < width; ++i)					\
953	{								\
954	    uint32_t s = combine_mask (src, mask, i);			\
955	    uint32_t d = *(dest + i);					\
956	    uint8_t sa = ALPHA_8 (s);					\
957	    uint8_t isa = ~sa;						\
958	    uint8_t da = ALPHA_8 (d);					\
959	    uint8_t ida = ~da;						\
960	    uint32_t result;						\
961	    uint32_t sc[3], dc[3], c[3];					\
962            								\
963	    result = d;							\
964	    UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (result, isa, s, ida);	\
965	    dc[0] = RED_8 (d);						\
966	    sc[0] = RED_8 (s);						\
967	    dc[1] = GREEN_8 (d);					\
968	    sc[1] = GREEN_8 (s);					\
969	    dc[2] = BLUE_8 (d);						\
970	    sc[2] = BLUE_8 (s);						\
971	    blend_ ## name (c, dc, da, sc, sa);				\
972            								\
973	    *(dest + i) = result +					\
974		(DIV_ONE_UN8 (sa * (uint32_t)da) << A_SHIFT) +		\
975		(DIV_ONE_UN8 (c[0]) << R_SHIFT) +			\
976		(DIV_ONE_UN8 (c[1]) << G_SHIFT) +			\
977		(DIV_ONE_UN8 (c[2]));					\
978	}								\
979    }
980
981static void
982set_lum (uint32_t dest[3], uint32_t src[3], uint32_t sa, uint32_t lum)
983{
984    double a, l, min, max;
985    double tmp[3];
986
987    a = sa * (1.0 / MASK);
988
989    l = lum * (1.0 / MASK);
990    tmp[0] = src[0] * (1.0 / MASK);
991    tmp[1] = src[1] * (1.0 / MASK);
992    tmp[2] = src[2] * (1.0 / MASK);
993
994    l = l - LUM (tmp);
995    tmp[0] += l;
996    tmp[1] += l;
997    tmp[2] += l;
998
999    /* clip_color */
1000    l = LUM (tmp);
1001    min = CH_MIN (tmp);
1002    max = CH_MAX (tmp);
1003
1004    if (min < 0)
1005    {
1006	if (l - min == 0.0)
1007	{
1008	    tmp[0] = 0;
1009	    tmp[1] = 0;
1010	    tmp[2] = 0;
1011	}
1012	else
1013	{
1014	    tmp[0] = l + (tmp[0] - l) * l / (l - min);
1015	    tmp[1] = l + (tmp[1] - l) * l / (l - min);
1016	    tmp[2] = l + (tmp[2] - l) * l / (l - min);
1017	}
1018    }
1019    if (max > a)
1020    {
1021	if (max - l == 0.0)
1022	{
1023	    tmp[0] = a;
1024	    tmp[1] = a;
1025	    tmp[2] = a;
1026	}
1027	else
1028	{
1029	    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
1030	    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
1031	    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
1032	}
1033    }
1034
1035    dest[0] = tmp[0] * MASK + 0.5;
1036    dest[1] = tmp[1] * MASK + 0.5;
1037    dest[2] = tmp[2] * MASK + 0.5;
1038}
1039
1040static void
1041set_sat (uint32_t dest[3], uint32_t src[3], uint32_t sat)
1042{
1043    int id[3];
1044    uint32_t min, max;
1045
1046    if (src[0] > src[1])
1047    {
1048	if (src[0] > src[2])
1049	{
1050	    id[0] = 0;
1051	    if (src[1] > src[2])
1052	    {
1053		id[1] = 1;
1054		id[2] = 2;
1055	    }
1056	    else
1057	    {
1058		id[1] = 2;
1059		id[2] = 1;
1060	    }
1061	}
1062	else
1063	{
1064	    id[0] = 2;
1065	    id[1] = 0;
1066	    id[2] = 1;
1067	}
1068    }
1069    else
1070    {
1071	if (src[0] > src[2])
1072	{
1073	    id[0] = 1;
1074	    id[1] = 0;
1075	    id[2] = 2;
1076	}
1077	else
1078	{
1079	    id[2] = 0;
1080	    if (src[1] > src[2])
1081	    {
1082		id[0] = 1;
1083		id[1] = 2;
1084	    }
1085	    else
1086	    {
1087		id[0] = 2;
1088		id[1] = 1;
1089	    }
1090	}
1091    }
1092
1093    max = dest[id[0]];
1094    min = dest[id[2]];
1095    if (max > min)
1096    {
1097	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
1098	dest[id[0]] = sat;
1099	dest[id[2]] = 0;
1100    }
1101    else
1102    {
1103	dest[0] = dest[1] = dest[2] = 0;
1104    }
1105}
1106
1107/*
1108 * Hue:
1109 * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
1110 */
1111static inline void
1112blend_hsl_hue (uint32_t c[3],
1113               uint32_t dc[3],
1114               uint32_t da,
1115               uint32_t sc[3],
1116               uint32_t sa)
1117{
1118    c[0] = sc[0] * da;
1119    c[1] = sc[1] * da;
1120    c[2] = sc[2] * da;
1121    set_sat (c, c, SAT (dc) * sa);
1122    set_lum (c, c, sa * da, LUM (dc) * sa);
1123}
1124
1125PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
1126
1127/*
1128 * Saturation:
1129 * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
1130 */
1131static inline void
1132blend_hsl_saturation (uint32_t c[3],
1133                      uint32_t dc[3],
1134                      uint32_t da,
1135                      uint32_t sc[3],
1136                      uint32_t sa)
1137{
1138    c[0] = dc[0] * sa;
1139    c[1] = dc[1] * sa;
1140    c[2] = dc[2] * sa;
1141    set_sat (c, c, SAT (sc) * da);
1142    set_lum (c, c, sa * da, LUM (dc) * sa);
1143}
1144
1145PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
1146
1147/*
1148 * Color:
1149 * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
1150 */
1151static inline void
1152blend_hsl_color (uint32_t c[3],
1153                 uint32_t dc[3],
1154                 uint32_t da,
1155                 uint32_t sc[3],
1156                 uint32_t sa)
1157{
1158    c[0] = sc[0] * da;
1159    c[1] = sc[1] * da;
1160    c[2] = sc[2] * da;
1161    set_lum (c, c, sa * da, LUM (dc) * sa);
1162}
1163
1164PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
1165
1166/*
1167 * Luminosity:
1168 * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
1169 */
1170static inline void
1171blend_hsl_luminosity (uint32_t c[3],
1172                      uint32_t dc[3],
1173                      uint32_t da,
1174                      uint32_t sc[3],
1175                      uint32_t sa)
1176{
1177    c[0] = dc[0] * sa;
1178    c[1] = dc[1] * sa;
1179    c[2] = dc[2] * sa;
1180    set_lum (c, c, sa * da, LUM (sc) * da);
1181}
1182
1183PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
1184
1185#undef SAT
1186#undef LUM
1187#undef CH_MAX
1188#undef CH_MIN
1189#undef PDF_NON_SEPARABLE_BLEND_MODE
1190
1191/* All of the disjoint/conjoint composing functions
1192 *
1193 * The four entries in the first column indicate what source contributions
1194 * come from each of the four areas of the picture -- areas covered by neither
1195 * A nor B, areas covered only by A, areas covered only by B and finally
1196 * areas covered by both A and B.
1197 *
1198 * Disjoint			Conjoint
1199 * Fa		Fb		Fa		Fb
1200 * (0,0,0,0)	0		0		0		0
1201 * (0,A,0,A)	1		0		1		0
1202 * (0,0,B,B)	0		1		0		1
1203 * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
1204 * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
1205 * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
1206 * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
1207 * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
1208 * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
1209 * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
1210 * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
1211 * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
1212 *
1213 * See  http://marc.info/?l=xfree-render&m=99792000027857&w=2  for more
1214 * information about these operators.
1215 */
1216
1217#define COMBINE_A_OUT 1
1218#define COMBINE_A_IN  2
1219#define COMBINE_B_OUT 4
1220#define COMBINE_B_IN  8
1221
1222#define COMBINE_CLEAR   0
1223#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
1224#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
1225#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
1226#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
1227#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
1228#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
1229#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
1230
1231/* portion covered by a but not b */
1232static uint8_t
1233combine_disjoint_out_part (uint8_t a, uint8_t b)
1234{
1235    /* min (1, (1-b) / a) */
1236
1237    b = ~b;                 /* 1 - b */
1238    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
1239	return MASK;        /* 1 */
1240    return DIV_UN8 (b, a);     /* (1-b) / a */
1241}
1242
1243/* portion covered by both a and b */
1244static uint8_t
1245combine_disjoint_in_part (uint8_t a, uint8_t b)
1246{
1247    /* max (1-(1-b)/a,0) */
1248    /*  = - min ((1-b)/a - 1, 0) */
1249    /*  = 1 - min (1, (1-b)/a) */
1250
1251    b = ~b;                 /* 1 - b */
1252    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
1253	return 0;           /* 1 - 1 */
1254    return ~DIV_UN8(b, a);    /* 1 - (1-b) / a */
1255}
1256
1257/* portion covered by a but not b */
1258static uint8_t
1259combine_conjoint_out_part (uint8_t a, uint8_t b)
1260{
1261    /* max (1-b/a,0) */
1262    /* = 1-min(b/a,1) */
1263
1264    /* min (1, (1-b) / a) */
1265
1266    if (b >= a)             /* b >= a -> b/a >= 1 */
1267	return 0x00;        /* 0 */
1268    return ~DIV_UN8(b, a);    /* 1 - b/a */
1269}
1270
1271/* portion covered by both a and b */
1272static uint8_t
1273combine_conjoint_in_part (uint8_t a, uint8_t b)
1274{
1275    /* min (1,b/a) */
1276
1277    if (b >= a)             /* b >= a -> b/a >= 1 */
1278	return MASK;        /* 1 */
1279    return DIV_UN8 (b, a);     /* b/a */
1280}
1281
1282#define GET_COMP(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
1283
1284#define ADD(x, y, i, t)							\
1285    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
1286     (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
1287
1288#define GENERIC(x, y, i, ax, ay, t, u, v)				\
1289    ((t) = (MUL_UN8 (GET_COMP (y, i), ay, (u)) +			\
1290            MUL_UN8 (GET_COMP (x, i), ax, (v))),			\
1291     (uint32_t) ((uint8_t) ((t) |					\
1292                           (0 - ((t) >> G_SHIFT)))) << (i))
1293
1294static void
1295combine_disjoint_general_u (uint32_t *      dest,
1296                            const uint32_t *src,
1297                            const uint32_t *mask,
1298                            int            width,
1299                            uint8_t        combine)
1300{
1301    int i;
1302
1303    for (i = 0; i < width; ++i)
1304    {
1305	uint32_t s = combine_mask (src, mask, i);
1306	uint32_t d = *(dest + i);
1307	uint32_t m, n, o, p;
1308	uint16_t Fa, Fb, t, u, v;
1309	uint8_t sa = s >> A_SHIFT;
1310	uint8_t da = d >> A_SHIFT;
1311
1312	switch (combine & COMBINE_A)
1313	{
1314	default:
1315	    Fa = 0;
1316	    break;
1317
1318	case COMBINE_A_OUT:
1319	    Fa = combine_disjoint_out_part (sa, da);
1320	    break;
1321
1322	case COMBINE_A_IN:
1323	    Fa = combine_disjoint_in_part (sa, da);
1324	    break;
1325
1326	case COMBINE_A:
1327	    Fa = MASK;
1328	    break;
1329	}
1330
1331	switch (combine & COMBINE_B)
1332	{
1333	default:
1334	    Fb = 0;
1335	    break;
1336
1337	case COMBINE_B_OUT:
1338	    Fb = combine_disjoint_out_part (da, sa);
1339	    break;
1340
1341	case COMBINE_B_IN:
1342	    Fb = combine_disjoint_in_part (da, sa);
1343	    break;
1344
1345	case COMBINE_B:
1346	    Fb = MASK;
1347	    break;
1348	}
1349	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
1350	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
1351	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
1352	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
1353	s = m | n | o | p;
1354	*(dest + i) = s;
1355    }
1356}
1357
1358static void
1359combine_disjoint_over_u (pixman_implementation_t *imp,
1360                         pixman_op_t              op,
1361                         uint32_t *                dest,
1362                         const uint32_t *          src,
1363                         const uint32_t *          mask,
1364                         int                      width)
1365{
1366    int i;
1367
1368    for (i = 0; i < width; ++i)
1369    {
1370	uint32_t s = combine_mask (src, mask, i);
1371	uint16_t a = s >> A_SHIFT;
1372
1373	if (s != 0x00)
1374	{
1375	    uint32_t d = *(dest + i);
1376	    a = combine_disjoint_out_part (d >> A_SHIFT, a);
1377	    UN8x4_MUL_UN8_ADD_UN8x4 (d, a, s);
1378
1379	    *(dest + i) = d;
1380	}
1381    }
1382}
1383
1384static void
1385combine_disjoint_in_u (pixman_implementation_t *imp,
1386                       pixman_op_t              op,
1387                       uint32_t *                dest,
1388                       const uint32_t *          src,
1389                       const uint32_t *          mask,
1390                       int                      width)
1391{
1392    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
1393}
1394
1395static void
1396combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
1397                               pixman_op_t              op,
1398                               uint32_t *                dest,
1399                               const uint32_t *          src,
1400                               const uint32_t *          mask,
1401                               int                      width)
1402{
1403    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
1404}
1405
1406static void
1407combine_disjoint_out_u (pixman_implementation_t *imp,
1408                        pixman_op_t              op,
1409                        uint32_t *                dest,
1410                        const uint32_t *          src,
1411                        const uint32_t *          mask,
1412                        int                      width)
1413{
1414    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
1415}
1416
1417static void
1418combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
1419                                pixman_op_t              op,
1420                                uint32_t *                dest,
1421                                const uint32_t *          src,
1422                                const uint32_t *          mask,
1423                                int                      width)
1424{
1425    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
1426}
1427
1428static void
1429combine_disjoint_atop_u (pixman_implementation_t *imp,
1430                         pixman_op_t              op,
1431                         uint32_t *                dest,
1432                         const uint32_t *          src,
1433                         const uint32_t *          mask,
1434                         int                      width)
1435{
1436    combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
1437}
1438
1439static void
1440combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
1441                                 pixman_op_t              op,
1442                                 uint32_t *                dest,
1443                                 const uint32_t *          src,
1444                                 const uint32_t *          mask,
1445                                 int                      width)
1446{
1447    combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
1448}
1449
1450static void
1451combine_disjoint_xor_u (pixman_implementation_t *imp,
1452                        pixman_op_t              op,
1453                        uint32_t *                dest,
1454                        const uint32_t *          src,
1455                        const uint32_t *          mask,
1456                        int                      width)
1457{
1458    combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
1459}
1460
1461static void
1462combine_conjoint_general_u (uint32_t *      dest,
1463                            const uint32_t *src,
1464                            const uint32_t *mask,
1465                            int            width,
1466                            uint8_t        combine)
1467{
1468    int i;
1469
1470    for (i = 0; i < width; ++i)
1471    {
1472	uint32_t s = combine_mask (src, mask, i);
1473	uint32_t d = *(dest + i);
1474	uint32_t m, n, o, p;
1475	uint16_t Fa, Fb, t, u, v;
1476	uint8_t sa = s >> A_SHIFT;
1477	uint8_t da = d >> A_SHIFT;
1478
1479	switch (combine & COMBINE_A)
1480	{
1481	default:
1482	    Fa = 0;
1483	    break;
1484
1485	case COMBINE_A_OUT:
1486	    Fa = combine_conjoint_out_part (sa, da);
1487	    break;
1488
1489	case COMBINE_A_IN:
1490	    Fa = combine_conjoint_in_part (sa, da);
1491	    break;
1492
1493	case COMBINE_A:
1494	    Fa = MASK;
1495	    break;
1496	}
1497
1498	switch (combine & COMBINE_B)
1499	{
1500	default:
1501	    Fb = 0;
1502	    break;
1503
1504	case COMBINE_B_OUT:
1505	    Fb = combine_conjoint_out_part (da, sa);
1506	    break;
1507
1508	case COMBINE_B_IN:
1509	    Fb = combine_conjoint_in_part (da, sa);
1510	    break;
1511
1512	case COMBINE_B:
1513	    Fb = MASK;
1514	    break;
1515	}
1516
1517	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
1518	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
1519	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
1520	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
1521
1522	s = m | n | o | p;
1523
1524	*(dest + i) = s;
1525    }
1526}
1527
1528static void
1529combine_conjoint_over_u (pixman_implementation_t *imp,
1530                         pixman_op_t              op,
1531                         uint32_t *                dest,
1532                         const uint32_t *          src,
1533                         const uint32_t *          mask,
1534                         int                      width)
1535{
1536    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
1537}
1538
1539static void
1540combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
1541                                 pixman_op_t              op,
1542                                 uint32_t *                dest,
1543                                 const uint32_t *          src,
1544                                 const uint32_t *          mask,
1545                                 int                      width)
1546{
1547    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
1548}
1549
1550static void
1551combine_conjoint_in_u (pixman_implementation_t *imp,
1552                       pixman_op_t              op,
1553                       uint32_t *                dest,
1554                       const uint32_t *          src,
1555                       const uint32_t *          mask,
1556                       int                      width)
1557{
1558    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
1559}
1560
1561static void
1562combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
1563                               pixman_op_t              op,
1564                               uint32_t *                dest,
1565                               const uint32_t *          src,
1566                               const uint32_t *          mask,
1567                               int                      width)
1568{
1569    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
1570}
1571
1572static void
1573combine_conjoint_out_u (pixman_implementation_t *imp,
1574                        pixman_op_t              op,
1575                        uint32_t *                dest,
1576                        const uint32_t *          src,
1577                        const uint32_t *          mask,
1578                        int                      width)
1579{
1580    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
1581}
1582
1583static void
1584combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
1585                                pixman_op_t              op,
1586                                uint32_t *                dest,
1587                                const uint32_t *          src,
1588                                const uint32_t *          mask,
1589                                int                      width)
1590{
1591    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
1592}
1593
1594static void
1595combine_conjoint_atop_u (pixman_implementation_t *imp,
1596                         pixman_op_t              op,
1597                         uint32_t *                dest,
1598                         const uint32_t *          src,
1599                         const uint32_t *          mask,
1600                         int                      width)
1601{
1602    combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
1603}
1604
1605static void
1606combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
1607                                 pixman_op_t              op,
1608                                 uint32_t *                dest,
1609                                 const uint32_t *          src,
1610                                 const uint32_t *          mask,
1611                                 int                      width)
1612{
1613    combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
1614}
1615
1616static void
1617combine_conjoint_xor_u (pixman_implementation_t *imp,
1618                        pixman_op_t              op,
1619                        uint32_t *                dest,
1620                        const uint32_t *          src,
1621                        const uint32_t *          mask,
1622                        int                      width)
1623{
1624    combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
1625}
1626
1627
1628/* Component alpha combiners */
1629
1630static void
1631combine_clear_ca (pixman_implementation_t *imp,
1632                  pixman_op_t              op,
1633                  uint32_t *                dest,
1634                  const uint32_t *          src,
1635                  const uint32_t *          mask,
1636                  int                      width)
1637{
1638    memset (dest, 0, width * sizeof(uint32_t));
1639}
1640
1641static void
1642combine_src_ca (pixman_implementation_t *imp,
1643                pixman_op_t              op,
1644                uint32_t *                dest,
1645                const uint32_t *          src,
1646                const uint32_t *          mask,
1647                int                      width)
1648{
1649    int i;
1650
1651    for (i = 0; i < width; ++i)
1652    {
1653	uint32_t s = *(src + i);
1654	uint32_t m = *(mask + i);
1655
1656	combine_mask_value_ca (&s, &m);
1657
1658	*(dest + i) = s;
1659    }
1660}
1661
1662static void
1663combine_over_ca (pixman_implementation_t *imp,
1664                 pixman_op_t              op,
1665                 uint32_t *                dest,
1666                 const uint32_t *          src,
1667                 const uint32_t *          mask,
1668                 int                      width)
1669{
1670    int i;
1671
1672    for (i = 0; i < width; ++i)
1673    {
1674	uint32_t s = *(src + i);
1675	uint32_t m = *(mask + i);
1676	uint32_t a;
1677
1678	combine_mask_ca (&s, &m);
1679
1680	a = ~m;
1681	if (a)
1682	{
1683	    uint32_t d = *(dest + i);
1684	    UN8x4_MUL_UN8x4_ADD_UN8x4 (d, a, s);
1685	    s = d;
1686	}
1687
1688	*(dest + i) = s;
1689    }
1690}
1691
1692static void
1693combine_over_reverse_ca (pixman_implementation_t *imp,
1694                         pixman_op_t              op,
1695                         uint32_t *                dest,
1696                         const uint32_t *          src,
1697                         const uint32_t *          mask,
1698                         int                      width)
1699{
1700    int i;
1701
1702    for (i = 0; i < width; ++i)
1703    {
1704	uint32_t d = *(dest + i);
1705	uint32_t a = ~d >> A_SHIFT;
1706
1707	if (a)
1708	{
1709	    uint32_t s = *(src + i);
1710	    uint32_t m = *(mask + i);
1711
1712	    UN8x4_MUL_UN8x4 (s, m);
1713	    UN8x4_MUL_UN8_ADD_UN8x4 (s, a, d);
1714
1715	    *(dest + i) = s;
1716	}
1717    }
1718}
1719
1720static void
1721combine_in_ca (pixman_implementation_t *imp,
1722               pixman_op_t              op,
1723               uint32_t *                dest,
1724               const uint32_t *          src,
1725               const uint32_t *          mask,
1726               int                      width)
1727{
1728    int i;
1729
1730    for (i = 0; i < width; ++i)
1731    {
1732	uint32_t d = *(dest + i);
1733	uint16_t a = d >> A_SHIFT;
1734	uint32_t s = 0;
1735
1736	if (a)
1737	{
1738	    uint32_t m = *(mask + i);
1739
1740	    s = *(src + i);
1741	    combine_mask_value_ca (&s, &m);
1742
1743	    if (a != MASK)
1744		UN8x4_MUL_UN8 (s, a);
1745	}
1746
1747	*(dest + i) = s;
1748    }
1749}
1750
1751static void
1752combine_in_reverse_ca (pixman_implementation_t *imp,
1753                       pixman_op_t              op,
1754                       uint32_t *                dest,
1755                       const uint32_t *          src,
1756                       const uint32_t *          mask,
1757                       int                      width)
1758{
1759    int i;
1760
1761    for (i = 0; i < width; ++i)
1762    {
1763	uint32_t s = *(src + i);
1764	uint32_t m = *(mask + i);
1765	uint32_t a;
1766
1767	combine_mask_alpha_ca (&s, &m);
1768
1769	a = m;
1770	if (a != ~0)
1771	{
1772	    uint32_t d = 0;
1773
1774	    if (a)
1775	    {
1776		d = *(dest + i);
1777		UN8x4_MUL_UN8x4 (d, a);
1778	    }
1779
1780	    *(dest + i) = d;
1781	}
1782    }
1783}
1784
1785static void
1786combine_out_ca (pixman_implementation_t *imp,
1787                pixman_op_t              op,
1788                uint32_t *                dest,
1789                const uint32_t *          src,
1790                const uint32_t *          mask,
1791                int                      width)
1792{
1793    int i;
1794
1795    for (i = 0; i < width; ++i)
1796    {
1797	uint32_t d = *(dest + i);
1798	uint16_t a = ~d >> A_SHIFT;
1799	uint32_t s = 0;
1800
1801	if (a)
1802	{
1803	    uint32_t m = *(mask + i);
1804
1805	    s = *(src + i);
1806	    combine_mask_value_ca (&s, &m);
1807
1808	    if (a != MASK)
1809		UN8x4_MUL_UN8 (s, a);
1810	}
1811
1812	*(dest + i) = s;
1813    }
1814}
1815
1816static void
1817combine_out_reverse_ca (pixman_implementation_t *imp,
1818                        pixman_op_t              op,
1819                        uint32_t *                dest,
1820                        const uint32_t *          src,
1821                        const uint32_t *          mask,
1822                        int                      width)
1823{
1824    int i;
1825
1826    for (i = 0; i < width; ++i)
1827    {
1828	uint32_t s = *(src + i);
1829	uint32_t m = *(mask + i);
1830	uint32_t a;
1831
1832	combine_mask_alpha_ca (&s, &m);
1833
1834	a = ~m;
1835	if (a != ~0)
1836	{
1837	    uint32_t d = 0;
1838
1839	    if (a)
1840	    {
1841		d = *(dest + i);
1842		UN8x4_MUL_UN8x4 (d, a);
1843	    }
1844
1845	    *(dest + i) = d;
1846	}
1847    }
1848}
1849
1850static void
1851combine_atop_ca (pixman_implementation_t *imp,
1852                 pixman_op_t              op,
1853                 uint32_t *                dest,
1854                 const uint32_t *          src,
1855                 const uint32_t *          mask,
1856                 int                      width)
1857{
1858    int i;
1859
1860    for (i = 0; i < width; ++i)
1861    {
1862	uint32_t d = *(dest + i);
1863	uint32_t s = *(src + i);
1864	uint32_t m = *(mask + i);
1865	uint32_t ad;
1866	uint16_t as = d >> A_SHIFT;
1867
1868	combine_mask_ca (&s, &m);
1869
1870	ad = ~m;
1871
1872	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
1873
1874	*(dest + i) = d;
1875    }
1876}
1877
1878static void
1879combine_atop_reverse_ca (pixman_implementation_t *imp,
1880                         pixman_op_t              op,
1881                         uint32_t *                dest,
1882                         const uint32_t *          src,
1883                         const uint32_t *          mask,
1884                         int                      width)
1885{
1886    int i;
1887
1888    for (i = 0; i < width; ++i)
1889    {
1890	uint32_t d = *(dest + i);
1891	uint32_t s = *(src + i);
1892	uint32_t m = *(mask + i);
1893	uint32_t ad;
1894	uint16_t as = ~d >> A_SHIFT;
1895
1896	combine_mask_ca (&s, &m);
1897
1898	ad = m;
1899
1900	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
1901
1902	*(dest + i) = d;
1903    }
1904}
1905
1906static void
1907combine_xor_ca (pixman_implementation_t *imp,
1908                pixman_op_t              op,
1909                uint32_t *                dest,
1910                const uint32_t *          src,
1911                const uint32_t *          mask,
1912                int                      width)
1913{
1914    int i;
1915
1916    for (i = 0; i < width; ++i)
1917    {
1918	uint32_t d = *(dest + i);
1919	uint32_t s = *(src + i);
1920	uint32_t m = *(mask + i);
1921	uint32_t ad;
1922	uint16_t as = ~d >> A_SHIFT;
1923
1924	combine_mask_ca (&s, &m);
1925
1926	ad = ~m;
1927
1928	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ad, s, as);
1929
1930	*(dest + i) = d;
1931    }
1932}
1933
1934static void
1935combine_add_ca (pixman_implementation_t *imp,
1936                pixman_op_t              op,
1937                uint32_t *                dest,
1938                const uint32_t *          src,
1939                const uint32_t *          mask,
1940                int                      width)
1941{
1942    int i;
1943
1944    for (i = 0; i < width; ++i)
1945    {
1946	uint32_t s = *(src + i);
1947	uint32_t m = *(mask + i);
1948	uint32_t d = *(dest + i);
1949
1950	combine_mask_value_ca (&s, &m);
1951
1952	UN8x4_ADD_UN8x4 (d, s);
1953
1954	*(dest + i) = d;
1955    }
1956}
1957
1958static void
1959combine_saturate_ca (pixman_implementation_t *imp,
1960                     pixman_op_t              op,
1961                     uint32_t *                dest,
1962                     const uint32_t *          src,
1963                     const uint32_t *          mask,
1964                     int                      width)
1965{
1966    int i;
1967
1968    for (i = 0; i < width; ++i)
1969    {
1970	uint32_t s, d;
1971	uint16_t sa, sr, sg, sb, da;
1972	uint16_t t, u, v;
1973	uint32_t m, n, o, p;
1974
1975	d = *(dest + i);
1976	s = *(src + i);
1977	m = *(mask + i);
1978
1979	combine_mask_ca (&s, &m);
1980
1981	sa = (m >> A_SHIFT);
1982	sr = (m >> R_SHIFT) & MASK;
1983	sg = (m >> G_SHIFT) & MASK;
1984	sb =  m             & MASK;
1985	da = ~d >> A_SHIFT;
1986
1987	if (sb <= da)
1988	    m = ADD (s, d, 0, t);
1989	else
1990	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
1991
1992	if (sg <= da)
1993	    n = ADD (s, d, G_SHIFT, t);
1994	else
1995	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
1996
1997	if (sr <= da)
1998	    o = ADD (s, d, R_SHIFT, t);
1999	else
2000	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
2001
2002	if (sa <= da)
2003	    p = ADD (s, d, A_SHIFT, t);
2004	else
2005	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
2006
2007	*(dest + i) = m | n | o | p;
2008    }
2009}
2010
2011static void
2012combine_disjoint_general_ca (uint32_t *      dest,
2013                             const uint32_t *src,
2014                             const uint32_t *mask,
2015                             int            width,
2016                             uint8_t        combine)
2017{
2018    int i;
2019
2020    for (i = 0; i < width; ++i)
2021    {
2022	uint32_t s, d;
2023	uint32_t m, n, o, p;
2024	uint32_t Fa, Fb;
2025	uint16_t t, u, v;
2026	uint32_t sa;
2027	uint8_t da;
2028
2029	s = *(src + i);
2030	m = *(mask + i);
2031	d = *(dest + i);
2032	da = d >> A_SHIFT;
2033
2034	combine_mask_ca (&s, &m);
2035
2036	sa = m;
2037
2038	switch (combine & COMBINE_A)
2039	{
2040	default:
2041	    Fa = 0;
2042	    break;
2043
2044	case COMBINE_A_OUT:
2045	    m = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> 0), da);
2046	    n = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
2047	    o = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
2048	    p = (uint32_t)combine_disjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
2049	    Fa = m | n | o | p;
2050	    break;
2051
2052	case COMBINE_A_IN:
2053	    m = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> 0), da);
2054	    n = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
2055	    o = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
2056	    p = (uint32_t)combine_disjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
2057	    Fa = m | n | o | p;
2058	    break;
2059
2060	case COMBINE_A:
2061	    Fa = ~0;
2062	    break;
2063	}
2064
2065	switch (combine & COMBINE_B)
2066	{
2067	default:
2068	    Fb = 0;
2069	    break;
2070
2071	case COMBINE_B_OUT:
2072	    m = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> 0));
2073	    n = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
2074	    o = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
2075	    p = (uint32_t)combine_disjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
2076	    Fb = m | n | o | p;
2077	    break;
2078
2079	case COMBINE_B_IN:
2080	    m = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> 0));
2081	    n = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
2082	    o = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
2083	    p = (uint32_t)combine_disjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
2084	    Fb = m | n | o | p;
2085	    break;
2086
2087	case COMBINE_B:
2088	    Fb = ~0;
2089	    break;
2090	}
2091	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
2092	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
2093	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
2094	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
2095
2096	s = m | n | o | p;
2097
2098	*(dest + i) = s;
2099    }
2100}
2101
2102static void
2103combine_disjoint_over_ca (pixman_implementation_t *imp,
2104                          pixman_op_t              op,
2105                          uint32_t *                dest,
2106                          const uint32_t *          src,
2107                          const uint32_t *          mask,
2108                          int                      width)
2109{
2110    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
2111}
2112
2113static void
2114combine_disjoint_in_ca (pixman_implementation_t *imp,
2115                        pixman_op_t              op,
2116                        uint32_t *                dest,
2117                        const uint32_t *          src,
2118                        const uint32_t *          mask,
2119                        int                      width)
2120{
2121    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
2122}
2123
2124static void
2125combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
2126                                pixman_op_t              op,
2127                                uint32_t *                dest,
2128                                const uint32_t *          src,
2129                                const uint32_t *          mask,
2130                                int                      width)
2131{
2132    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
2133}
2134
2135static void
2136combine_disjoint_out_ca (pixman_implementation_t *imp,
2137                         pixman_op_t              op,
2138                         uint32_t *                dest,
2139                         const uint32_t *          src,
2140                         const uint32_t *          mask,
2141                         int                      width)
2142{
2143    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
2144}
2145
2146static void
2147combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
2148                                 pixman_op_t              op,
2149                                 uint32_t *                dest,
2150                                 const uint32_t *          src,
2151                                 const uint32_t *          mask,
2152                                 int                      width)
2153{
2154    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
2155}
2156
2157static void
2158combine_disjoint_atop_ca (pixman_implementation_t *imp,
2159                          pixman_op_t              op,
2160                          uint32_t *                dest,
2161                          const uint32_t *          src,
2162                          const uint32_t *          mask,
2163                          int                      width)
2164{
2165    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
2166}
2167
2168static void
2169combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
2170                                  pixman_op_t              op,
2171                                  uint32_t *                dest,
2172                                  const uint32_t *          src,
2173                                  const uint32_t *          mask,
2174                                  int                      width)
2175{
2176    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
2177}
2178
2179static void
2180combine_disjoint_xor_ca (pixman_implementation_t *imp,
2181                         pixman_op_t              op,
2182                         uint32_t *                dest,
2183                         const uint32_t *          src,
2184                         const uint32_t *          mask,
2185                         int                      width)
2186{
2187    combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
2188}
2189
2190static void
2191combine_conjoint_general_ca (uint32_t *      dest,
2192                             const uint32_t *src,
2193                             const uint32_t *mask,
2194                             int            width,
2195                             uint8_t        combine)
2196{
2197    int i;
2198
2199    for (i = 0; i < width; ++i)
2200    {
2201	uint32_t s, d;
2202	uint32_t m, n, o, p;
2203	uint32_t Fa, Fb;
2204	uint16_t t, u, v;
2205	uint32_t sa;
2206	uint8_t da;
2207
2208	s = *(src + i);
2209	m = *(mask + i);
2210	d = *(dest + i);
2211	da = d >> A_SHIFT;
2212
2213	combine_mask_ca (&s, &m);
2214
2215	sa = m;
2216
2217	switch (combine & COMBINE_A)
2218	{
2219	default:
2220	    Fa = 0;
2221	    break;
2222
2223	case COMBINE_A_OUT:
2224	    m = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> 0), da);
2225	    n = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
2226	    o = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
2227	    p = (uint32_t)combine_conjoint_out_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
2228	    Fa = m | n | o | p;
2229	    break;
2230
2231	case COMBINE_A_IN:
2232	    m = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> 0), da);
2233	    n = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> G_SHIFT), da) << G_SHIFT;
2234	    o = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> R_SHIFT), da) << R_SHIFT;
2235	    p = (uint32_t)combine_conjoint_in_part ((uint8_t) (sa >> A_SHIFT), da) << A_SHIFT;
2236	    Fa = m | n | o | p;
2237	    break;
2238
2239	case COMBINE_A:
2240	    Fa = ~0;
2241	    break;
2242	}
2243
2244	switch (combine & COMBINE_B)
2245	{
2246	default:
2247	    Fb = 0;
2248	    break;
2249
2250	case COMBINE_B_OUT:
2251	    m = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> 0));
2252	    n = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
2253	    o = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
2254	    p = (uint32_t)combine_conjoint_out_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
2255	    Fb = m | n | o | p;
2256	    break;
2257
2258	case COMBINE_B_IN:
2259	    m = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> 0));
2260	    n = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> G_SHIFT)) << G_SHIFT;
2261	    o = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> R_SHIFT)) << R_SHIFT;
2262	    p = (uint32_t)combine_conjoint_in_part (da, (uint8_t) (sa >> A_SHIFT)) << A_SHIFT;
2263	    Fb = m | n | o | p;
2264	    break;
2265
2266	case COMBINE_B:
2267	    Fb = ~0;
2268	    break;
2269	}
2270	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
2271	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
2272	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
2273	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
2274
2275	s = m | n | o | p;
2276
2277	*(dest + i) = s;
2278    }
2279}
2280
2281static void
2282combine_conjoint_over_ca (pixman_implementation_t *imp,
2283                          pixman_op_t              op,
2284                          uint32_t *                dest,
2285                          const uint32_t *          src,
2286                          const uint32_t *          mask,
2287                          int                      width)
2288{
2289    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
2290}
2291
2292static void
2293combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
2294                                  pixman_op_t              op,
2295                                  uint32_t *                dest,
2296                                  const uint32_t *          src,
2297                                  const uint32_t *          mask,
2298                                  int                      width)
2299{
2300    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
2301}
2302
2303static void
2304combine_conjoint_in_ca (pixman_implementation_t *imp,
2305                        pixman_op_t              op,
2306                        uint32_t *                dest,
2307                        const uint32_t *          src,
2308                        const uint32_t *          mask,
2309                        int                      width)
2310{
2311    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
2312}
2313
2314static void
2315combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
2316                                pixman_op_t              op,
2317                                uint32_t *                dest,
2318                                const uint32_t *          src,
2319                                const uint32_t *          mask,
2320                                int                      width)
2321{
2322    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
2323}
2324
2325static void
2326combine_conjoint_out_ca (pixman_implementation_t *imp,
2327                         pixman_op_t              op,
2328                         uint32_t *                dest,
2329                         const uint32_t *          src,
2330                         const uint32_t *          mask,
2331                         int                      width)
2332{
2333    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
2334}
2335
2336static void
2337combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
2338                                 pixman_op_t              op,
2339                                 uint32_t *                dest,
2340                                 const uint32_t *          src,
2341                                 const uint32_t *          mask,
2342                                 int                      width)
2343{
2344    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
2345}
2346
2347static void
2348combine_conjoint_atop_ca (pixman_implementation_t *imp,
2349                          pixman_op_t              op,
2350                          uint32_t *                dest,
2351                          const uint32_t *          src,
2352                          const uint32_t *          mask,
2353                          int                      width)
2354{
2355    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
2356}
2357
2358static void
2359combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
2360                                  pixman_op_t              op,
2361                                  uint32_t *                dest,
2362                                  const uint32_t *          src,
2363                                  const uint32_t *          mask,
2364                                  int                      width)
2365{
2366    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
2367}
2368
2369static void
2370combine_conjoint_xor_ca (pixman_implementation_t *imp,
2371                         pixman_op_t              op,
2372                         uint32_t *                dest,
2373                         const uint32_t *          src,
2374                         const uint32_t *          mask,
2375                         int                      width)
2376{
2377    combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
2378}
2379
2380void
2381_pixman_setup_combiner_functions_32 (pixman_implementation_t *imp)
2382{
2383    /* Unified alpha */
2384    imp->combine_32[PIXMAN_OP_CLEAR] = combine_clear;
2385    imp->combine_32[PIXMAN_OP_SRC] = combine_src_u;
2386    imp->combine_32[PIXMAN_OP_DST] = combine_dst;
2387    imp->combine_32[PIXMAN_OP_OVER] = combine_over_u;
2388    imp->combine_32[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_u;
2389    imp->combine_32[PIXMAN_OP_IN] = combine_in_u;
2390    imp->combine_32[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_u;
2391    imp->combine_32[PIXMAN_OP_OUT] = combine_out_u;
2392    imp->combine_32[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_u;
2393    imp->combine_32[PIXMAN_OP_ATOP] = combine_atop_u;
2394    imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_u;
2395    imp->combine_32[PIXMAN_OP_XOR] = combine_xor_u;
2396    imp->combine_32[PIXMAN_OP_ADD] = combine_add_u;
2397    imp->combine_32[PIXMAN_OP_SATURATE] = combine_saturate_u;
2398
2399    /* Disjoint, unified */
2400    imp->combine_32[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear;
2401    imp->combine_32[PIXMAN_OP_DISJOINT_SRC] = combine_src_u;
2402    imp->combine_32[PIXMAN_OP_DISJOINT_DST] = combine_dst;
2403    imp->combine_32[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_u;
2404    imp->combine_32[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_u;
2405    imp->combine_32[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_u;
2406    imp->combine_32[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_u;
2407    imp->combine_32[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_u;
2408    imp->combine_32[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_u;
2409    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_u;
2410    imp->combine_32[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_u;
2411    imp->combine_32[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_u;
2412
2413    /* Conjoint, unified */
2414    imp->combine_32[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear;
2415    imp->combine_32[PIXMAN_OP_CONJOINT_SRC] = combine_src_u;
2416    imp->combine_32[PIXMAN_OP_CONJOINT_DST] = combine_dst;
2417    imp->combine_32[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_u;
2418    imp->combine_32[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_u;
2419    imp->combine_32[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_u;
2420    imp->combine_32[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_u;
2421    imp->combine_32[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_u;
2422    imp->combine_32[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_u;
2423    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_u;
2424    imp->combine_32[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_u;
2425    imp->combine_32[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_u;
2426
2427    imp->combine_32[PIXMAN_OP_MULTIPLY] = combine_multiply_u;
2428    imp->combine_32[PIXMAN_OP_SCREEN] = combine_screen_u;
2429    imp->combine_32[PIXMAN_OP_OVERLAY] = combine_overlay_u;
2430    imp->combine_32[PIXMAN_OP_DARKEN] = combine_darken_u;
2431    imp->combine_32[PIXMAN_OP_LIGHTEN] = combine_lighten_u;
2432    imp->combine_32[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_u;
2433    imp->combine_32[PIXMAN_OP_COLOR_BURN] = combine_color_burn_u;
2434    imp->combine_32[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_u;
2435    imp->combine_32[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_u;
2436    imp->combine_32[PIXMAN_OP_DIFFERENCE] = combine_difference_u;
2437    imp->combine_32[PIXMAN_OP_EXCLUSION] = combine_exclusion_u;
2438    imp->combine_32[PIXMAN_OP_HSL_HUE] = combine_hsl_hue_u;
2439    imp->combine_32[PIXMAN_OP_HSL_SATURATION] = combine_hsl_saturation_u;
2440    imp->combine_32[PIXMAN_OP_HSL_COLOR] = combine_hsl_color_u;
2441    imp->combine_32[PIXMAN_OP_HSL_LUMINOSITY] = combine_hsl_luminosity_u;
2442
2443    /* Component alpha combiners */
2444    imp->combine_32_ca[PIXMAN_OP_CLEAR] = combine_clear_ca;
2445    imp->combine_32_ca[PIXMAN_OP_SRC] = combine_src_ca;
2446    /* dest */
2447    imp->combine_32_ca[PIXMAN_OP_OVER] = combine_over_ca;
2448    imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = combine_over_reverse_ca;
2449    imp->combine_32_ca[PIXMAN_OP_IN] = combine_in_ca;
2450    imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = combine_in_reverse_ca;
2451    imp->combine_32_ca[PIXMAN_OP_OUT] = combine_out_ca;
2452    imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = combine_out_reverse_ca;
2453    imp->combine_32_ca[PIXMAN_OP_ATOP] = combine_atop_ca;
2454    imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = combine_atop_reverse_ca;
2455    imp->combine_32_ca[PIXMAN_OP_XOR] = combine_xor_ca;
2456    imp->combine_32_ca[PIXMAN_OP_ADD] = combine_add_ca;
2457    imp->combine_32_ca[PIXMAN_OP_SATURATE] = combine_saturate_ca;
2458
2459    /* Disjoint CA */
2460    imp->combine_32_ca[PIXMAN_OP_DISJOINT_CLEAR] = combine_clear_ca;
2461    imp->combine_32_ca[PIXMAN_OP_DISJOINT_SRC] = combine_src_ca;
2462    imp->combine_32_ca[PIXMAN_OP_DISJOINT_DST] = combine_dst;
2463    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER] = combine_disjoint_over_ca;
2464    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OVER_REVERSE] = combine_saturate_ca;
2465    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN] = combine_disjoint_in_ca;
2466    imp->combine_32_ca[PIXMAN_OP_DISJOINT_IN_REVERSE] = combine_disjoint_in_reverse_ca;
2467    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT] = combine_disjoint_out_ca;
2468    imp->combine_32_ca[PIXMAN_OP_DISJOINT_OUT_REVERSE] = combine_disjoint_out_reverse_ca;
2469    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP] = combine_disjoint_atop_ca;
2470    imp->combine_32_ca[PIXMAN_OP_DISJOINT_ATOP_REVERSE] = combine_disjoint_atop_reverse_ca;
2471    imp->combine_32_ca[PIXMAN_OP_DISJOINT_XOR] = combine_disjoint_xor_ca;
2472
2473    /* Conjoint CA */
2474    imp->combine_32_ca[PIXMAN_OP_CONJOINT_CLEAR] = combine_clear_ca;
2475    imp->combine_32_ca[PIXMAN_OP_CONJOINT_SRC] = combine_src_ca;
2476    imp->combine_32_ca[PIXMAN_OP_CONJOINT_DST] = combine_dst;
2477    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER] = combine_conjoint_over_ca;
2478    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OVER_REVERSE] = combine_conjoint_over_reverse_ca;
2479    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN] = combine_conjoint_in_ca;
2480    imp->combine_32_ca[PIXMAN_OP_CONJOINT_IN_REVERSE] = combine_conjoint_in_reverse_ca;
2481    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT] = combine_conjoint_out_ca;
2482    imp->combine_32_ca[PIXMAN_OP_CONJOINT_OUT_REVERSE] = combine_conjoint_out_reverse_ca;
2483    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP] = combine_conjoint_atop_ca;
2484    imp->combine_32_ca[PIXMAN_OP_CONJOINT_ATOP_REVERSE] = combine_conjoint_atop_reverse_ca;
2485    imp->combine_32_ca[PIXMAN_OP_CONJOINT_XOR] = combine_conjoint_xor_ca;
2486
2487    imp->combine_32_ca[PIXMAN_OP_MULTIPLY] = combine_multiply_ca;
2488    imp->combine_32_ca[PIXMAN_OP_SCREEN] = combine_screen_ca;
2489    imp->combine_32_ca[PIXMAN_OP_OVERLAY] = combine_overlay_ca;
2490    imp->combine_32_ca[PIXMAN_OP_DARKEN] = combine_darken_ca;
2491    imp->combine_32_ca[PIXMAN_OP_LIGHTEN] = combine_lighten_ca;
2492    imp->combine_32_ca[PIXMAN_OP_COLOR_DODGE] = combine_color_dodge_ca;
2493    imp->combine_32_ca[PIXMAN_OP_COLOR_BURN] = combine_color_burn_ca;
2494    imp->combine_32_ca[PIXMAN_OP_HARD_LIGHT] = combine_hard_light_ca;
2495    imp->combine_32_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
2496    imp->combine_32_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
2497    imp->combine_32_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
2498
2499    /* It is not clear that these make sense, so make them noops for now */
2500    imp->combine_32_ca[PIXMAN_OP_HSL_HUE] = combine_dst;
2501    imp->combine_32_ca[PIXMAN_OP_HSL_SATURATION] = combine_dst;
2502    imp->combine_32_ca[PIXMAN_OP_HSL_COLOR] = combine_dst;
2503    imp->combine_32_ca[PIXMAN_OP_HSL_LUMINOSITY] = combine_dst;
2504}
2505