17d39c1ae76cc7dc6793980fd83db100399ee9179Brian	;
29add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul/*
33474e9de924d92a941b4ea33ecc694f5fad2651fJosé Fonseca * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
49add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */
59add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
6afb833d4e89c312460a4ab9ed6a7a8ca4ebbfe1cjtg
7462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#ifdef USE_MMX_ASM
83474e9de924d92a941b4ea33ecc694f5fad2651fJosé Fonseca#include "assyntax.h"
9462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#include "matypes.h"
1055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
1155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* integer multiplication - alpha plus one
1255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
1355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * makes the following approximation to the division (Sree)
149add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
159add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *   rgb*a/255 ~= (rgb*(a+1)) >> 256
169add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
179add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * which is the fastest method that satisfies the following OpenGL criteria
189add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
199add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *   0*0 = 0 and 255*255 = 255
209add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
2155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
2255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
2355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *   PCMPEQW    ( MX1, MX1 )
249add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */
2555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
2655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSUBW      ( MX1, MA1 )			/*   a1 + 1  |   a1 + 1  |   a1 + 1  |   a1 + 1  */	;\
2755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
2804df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
2904df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW      ( MX1, MA2 ))			/*   a2 + 1  |   a2 + 1  |   a2 + 1  |   a2 + 1  */	;\
3055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
3155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
3255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(8), MA1 )		/*               t1 >> 8 ~= t1/255               */	;\
3355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*               t2 >> 8 ~= t2/255               */
3455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
3555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
3655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* integer multiplication - geometric series
3755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
3855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * takes the geometric series approximation to the division
399add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
409add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *   t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
419add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
429add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * in this case just the first two terms to fit in 16bit arithmetic
439add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
449add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *   t/255 ~= (t + (t >> 8)) >> 8
459add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
469add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,
479add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * so the special case a = 255 must be accounted or roundoff must be used
489add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */
4955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
5055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
5155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
5255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
5355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    MOVQ       ( MA1, MP1 )										;\
5404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
5555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
5604df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ       ( MA2, MP2 ))										;\
5704df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
5855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
5955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
6055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
6104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
6204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
6355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
6455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
6555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
6655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* integer multiplication - geometric series plus rounding
6755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
689add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * when using a geometric series division instead of truncating the result
699add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * use roundoff in the approximation (Jim Blinn)
709add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
719add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *   t = rgb*a + 0x80
729add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul *
739add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * achieving the exact results
7455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
7555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that M80 is register with the 0x0080008000800080 constant
769add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */
7755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
7855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
7955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\
8004df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
8104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
8255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\
8355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
8455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    MOVQ       ( MA1, MP1 )										;\
8504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
8655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
8704df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ       ( MA2, MP2 ))										;\
8804df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
8955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
9055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
9155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
9204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
9304df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
9455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
9555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
9655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
9755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* linear interpolation - geometric series
9855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */
9955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
10055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
10155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
10255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
10304df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
10404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
10504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
10655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
10755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
10855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    MOVQ       ( MA1, MP1 )										;\
10904df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
11055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
11104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ       ( MA2, MP2 ))										;\
11204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
11355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
11455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
11555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
11655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
11755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
11855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
11955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
12055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
12155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
12255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
12355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
12455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* linear interpolation - geometric series with roundoff
12555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
12655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * this is a generalization of Blinn's formula to signed arithmetic
12755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
12855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that M80 is a register with the 0x0080008000800080 constant
12955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */
13055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
13155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
13255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
13355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
13404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
13504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
13604df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
13755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
13855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
13955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(15), MP1 )		/*                 q1 > p1 ? 1 : 0               */	;\
14055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(15), MP2 ))		/*                 q2 > q2 ? 1 : 0               */	;\
14155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
14255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSLLW      ( CONST(8), MP1 )		/*             q1 > p1 ? 0x100 : 0               */	;\
14355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSLLW      ( CONST(8), MP2 ))		/*             q2 > q2 ? 0x100 : 0               */	;\
14455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
14555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSUBW      ( MP1, MA1 )			/*                  t1 -=? 0x100                 */	;\
14655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSUBW      ( MP2, MA2 ))			/*                  t2 -=? 0x100                 */	;\
14755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 													;\
14855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\
14955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\
15055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
15155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    MOVQ       ( MA1, MP1 )										;\
15204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
15355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
15404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ       ( MA2, MP2 ))										;\
15504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
15655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
15755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
15855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
15955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
16055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
16155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
16255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
16355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
16455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
16555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
16655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
16755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* linear interpolation - geometric series with correction
16855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
16955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
170cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul *
171cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul *   t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
172cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul *
173cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * note that although is faster than rounding off it doesn't give always the exact results
174cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul */
17555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
17655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
17755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
17855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
17904df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca													;\
18004df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
18104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
18255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
18355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
18455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    MOVQ       ( MA1, MP1 )										;\
18504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
18655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
18704df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ       ( MA2, MP2 ))										;\
18804df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
18955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
19004df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PADDW      ( MA1, MP1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
19104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca    PSRLW      ( CONST(7), MA1 )		/*                    t1 >> 15                   */	;\
19255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
19304df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PADDW      ( MA2, MP2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
19404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW      ( CONST(7), MA2 ))		/*                    t2 >> 15                   */	;\
19555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
19655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MP1, MA1 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */	;\
19755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( MP2, MA2 ))			/*  t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8  */	;\
19855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
19955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
20055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
20155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
20255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
20355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
20455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
20555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
206533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca/* common blending setup code
20755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
20855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
20955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *
21055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca *   PXOR      ( M00, M00 )
21155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */
212533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_LOAD(rgba, dest, MPP, MQQ) \
213533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaONE(MOVD       ( REGIND(rgba), MPP ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\
214533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaONE(MOVD       ( REGIND(dest), MQQ ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\
21555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
216533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaTWO(MOVQ       ( REGIND(rgba), MPP ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\
217533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaTWO(MOVQ       ( REGIND(dest), MQQ ))		/* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */
218533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
219533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
22055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(MOVQ       ( MP1, MP2 ))										;\
22155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(MOVQ       ( MQ1, MQ2 ))										;\
22255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
22355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PUNPCKLBW  ( M00, MQ1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */	;\
22455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PUNPCKHBW  ( M00, MQ2 ))                    /*    qa2    |    qb2    |    qg2    |    qr2    */	;\
22555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PUNPCKLBW  ( M00, MP1 )			/*    pa1    |    pb1    |    pg1    |    pr1    */	;\
226533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaTWO(PUNPCKHBW  ( M00, MP2 ))                    /*    pa2    |    pb2    |    pg2    |    pr2    */
227533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
228533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
22955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    MOVQ       ( MP1, MA1 )										;\
23055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(MOVQ       ( MP2, MA2 ))										;\
23155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca													;\
23255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PUNPCKHWD  ( MA1, MA1 )			/*    pa1    |    pa1    |           |           */	;\
23355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PUNPCKHWD  ( MA2, MA2 ))			/*    pa2    |    pa2    |           |           */	;\
23455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca    PUNPCKHDQ  ( MA1, MA1 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */	;\
23555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PUNPCKHDQ  ( MA2, MA2 ))                    /*    pa2    |    pa2    |    pa2    |    pa2    */
23655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca
237533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_PACK( MS1, MS2 ) \
238533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    PACKUSWB   ( MS2, MS1 )			/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\
2399add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
240533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_STORE(rgba, MSS ) \
2413fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD       ( MSS, REGIND(rgba) ))		/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */	;\
2423fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(MOVQ       ( MSS, REGIND(rgba) ))		/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */
243d60bb2fbc8b61e9748ce9c235acd4e870a2df613Jose Fonseca
2447d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006
2457d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment
2467d39c1ae76cc7dc6793980fd83db100399ee9179Brian * constants (via pushl/movq)
247533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    SEG_DATA
248533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
249533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaALIGNDATA8
2503fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonsecaconst_0080:
2513fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    D_LONG 0x00800080, 0x00800080
2523fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
253533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonsecaconst_80:
2543fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    D_LONG 0x80808080, 0x80808080
2557d39c1ae76cc7dc6793980fd83db100399ee9179Brian*/
2567d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_0080_l 0x00800080
2577d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_0080_h 0x00800080
2587d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_80_l 0x80808080
2597d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_80_h 0x80808080
260533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
261533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    SEG_TEXT
262d60bb2fbc8b61e9748ce9c235acd4e870a2df613Jose Fonseca
2639add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
2643fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend transparency function
26555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */
2669add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
267b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_transparency)
268b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_transparency)
2699add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
27055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define INIT \
271533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */
272533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
273533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define MAIN( rgba, dest ) \
274533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
275533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )								;\
276533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_ALPHA( MM1, MM3, MM4, MM6 )									;\
277533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 )							;\
278533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_PACK( MM3, MM6 )										;\
279533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_STORE( rgba, MM3 )
280533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
281533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#include "mmx_blendtmp.h"
282533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
283533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
2843fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend add function
2853fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca *
2863fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca * FIXME: Add some loop unrolling here...
2870c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca */
2880c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca
289b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_add)
290b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_add)
2910c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca
2920c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca#define INIT
2930c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca
2940c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca#define MAIN( rgba, dest ) \
2953fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD       ( REGIND(rgba), MM1 ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\
2963fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD       ( REGIND(dest), MM2 ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\
2973fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(PADDUSB    ( MM2, MM1 ))										;\
2983fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD       ( MM1, REGIND(rgba) ))		/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */	;\
2993fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca													;\
3003fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(MOVQ       ( REGIND(rgba), MM1 ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\
3013fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(PADDUSB    ( REGIND(dest), MM1 ))		/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\
3023fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(MOVQ       ( MM1, REGIND(rgba) ))
3033fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3043fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#include "mmx_blendtmp.h"
3053fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3063fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3073fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend min function
3083fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca */
3093fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
310b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_min)
311b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_min)
3123fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3137d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn 2nd July 2006
3147d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment instructions
3157d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define INIT \
3167d39c1ae76cc7dc6793980fd83db100399ee9179Brian    MOVQ       ( CONTENT(const_80), MM7 )
3177d39c1ae76cc7dc6793980fd83db100399ee9179Brian */
3183fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define INIT \
3197d39c1ae76cc7dc6793980fd83db100399ee9179Brian    PUSH_L     ( CONST(const_80_h) ) 		/* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/	;\
3207d39c1ae76cc7dc6793980fd83db100399ee9179Brian    PUSH_L     ( CONST(const_80_l) ) 									;\
3217d39c1ae76cc7dc6793980fd83db100399ee9179Brian    MOVQ       ( REGIND(ESP), MM7 ) 									;\
3227d39c1ae76cc7dc6793980fd83db100399ee9179Brian    ADD_L      ( CONST(8), ESP)
3233fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3243fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define MAIN( rgba, dest ) \
3250c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
3263fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    MOVQ       ( MM1, MM3 )										;\
3273fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    MOVQ       ( MM2, MM4 )										;\
3283fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PXOR       ( MM7, MM3 )			/*              unsigned -> signed               */	;\
3293fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PXOR       ( MM7, MM4 )			/*              unsigned -> signed               */	;\
3303fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PCMPGTB    ( MM3, MM4 )			/*                 q > p ? 0xff : 0x00           */	;\
3313fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PAND       ( MM4, MM1 )			/*                 q > p ? p : 0                 */	;\
3323fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PANDN      ( MM2, MM4 )			/*                 q > p ? 0 : q                 */	;\
3333fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    POR        ( MM1, MM4 )			/*                 q > p ? p : q                 */	;\
3343fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    GMB_STORE( rgba, MM4 )
3353fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3363fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#include "mmx_blendtmp.h"
3373fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3383fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3393fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend max function
3403fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca */
3413fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
342b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_max)
343b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_max)
3443fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3457d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn 2nd July 2006
3467d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment instructions
3473fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define INIT \
3487d39c1ae76cc7dc6793980fd83db100399ee9179Brian    MOVQ       ( CONTENT(const_80), MM7 )
3497d39c1ae76cc7dc6793980fd83db100399ee9179Brian */
3507d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define INIT \
3517d39c1ae76cc7dc6793980fd83db100399ee9179Brian    PUSH_L     ( CONST(const_80_l) ) 		/* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/	;\
3527d39c1ae76cc7dc6793980fd83db100399ee9179Brian    PUSH_L     ( CONST(const_80_h) ) 									;\
3537d39c1ae76cc7dc6793980fd83db100399ee9179Brian    MOVQ       ( REGIND(ESP), MM7 ) 									;\
3547d39c1ae76cc7dc6793980fd83db100399ee9179Brian    ADD_L      ( CONST(8), ESP)
3553fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca
3563fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define MAIN( rgba, dest ) \
3573fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
3583fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    MOVQ       ( MM1, MM3 )										;\
3593fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    MOVQ       ( MM2, MM4 )										;\
3603fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PXOR       ( MM7, MM3 )			/*              unsigned -> signed               */	;\
3613fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PXOR       ( MM7, MM4 )			/*              unsigned -> signed               */	;\
3623fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PCMPGTB    ( MM3, MM4 )			/*                 q > p ? 0xff : 0x00           */	;\
3633fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PAND       ( MM4, MM2 )			/*                 q > p ? q : 0                 */	;\
3643fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    PANDN      ( MM1, MM4 )			/*                 q > p ? 0 : p                 */	;\
3653fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    POR        ( MM2, MM4 )			/*                 q > p ? p : q                 */	;\
3663fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca    GMB_STORE( rgba, MM4 )
3670c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca
3680c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca#include "mmx_blendtmp.h"
3690c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca
3700c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca
3713fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend modulate function
372533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca */
373533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
374b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_modulate)
375b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_modulate)
376533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca
3777d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn 2nd July 2006
3787d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment instructions
3797d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define INIT \
3807d39c1ae76cc7dc6793980fd83db100399ee9179Brian    MOVQ       ( CONTENT(const_0080), MM7 )
3817d39c1ae76cc7dc6793980fd83db100399ee9179Brian */
382533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define INIT \
383533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */	;\
3847d39c1ae76cc7dc6793980fd83db100399ee9179Brian    PUSH_L     ( CONST(const_0080_l) ) 	/*   0x0080  |   0x0080  |   0x0080  |   0x0080  */	;\
3857d39c1ae76cc7dc6793980fd83db100399ee9179Brian    PUSH_L     ( CONST(const_0080_h) ) 								;\
3867d39c1ae76cc7dc6793980fd83db100399ee9179Brian    MOVQ       ( REGIND(ESP), MM7 ) 									;\
3877d39c1ae76cc7dc6793980fd83db100399ee9179Brian    ADD_L      ( CONST(8), ESP)
3889add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
389533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define MAIN( rgba, dest ) \
390533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
391533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )								;\
392533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 )								;\
393533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_PACK( MM2, MM5 )										;\
394533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca    GMB_STORE( rgba, MM2 )
395afb833d4e89c312460a4ab9ed6a7a8ca4ebbfe1cjtg
39655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#include "mmx_blendtmp.h"
3979add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul
398462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#endif
399fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg
400fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg#if defined (__ELF__) && defined (__linux__)
401fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg	.section .note.GNU-stack,"",%progbits
402fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg#endif
403