13a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org	;
23a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/*
33a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
43a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
53a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
63a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
73a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#ifdef USE_MMX_ASM
83a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "assyntax.h"
93a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "matypes.h"
103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* integer multiplication - alpha plus one
123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * makes the following approximation to the division (Sree)
143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   rgb*a/255 ~= (rgb*(a+1)) >> 256
163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * which is the fastest method that satisfies the following OpenGL criteria
183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   0*0 = 0 and 255*255 = 255
203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   PCMPEQW    ( MX1, MX1 )
243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSUBW      ( MX1, MA1 )			/*   a1 + 1  |   a1 + 1  |   a1 + 1  |   a1 + 1  */	;\
273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW      ( MX1, MA2 ))			/*   a2 + 1  |   a2 + 1  |   a2 + 1  |   a2 + 1  */	;\
303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*               t1 >> 8 ~= t1/255               */	;\
333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*               t2 >> 8 ~= t2/255               */
343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* integer multiplication - geometric series
373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * takes the geometric series approximation to the division
393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * in this case just the first two terms to fit in 16bit arithmetic
433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   t/255 ~= (t + (t >> 8)) >> 8
453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,
473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * so the special case a = 255 must be accounted or roundoff must be used
483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MA1, MP1 )										;\
543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MA2, MP2 ))										;\
573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* integer multiplication - geometric series plus rounding
673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * when using a geometric series division instead of truncating the result
693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * use roundoff in the approximation (Jim Blinn)
703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   t = rgb*a + 0x80
723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * achieving the exact results
743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that M80 is register with the 0x0080008000800080 constant
763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\
803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\
833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MA1, MP1 )										;\
853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MA2, MP2 ))										;\
883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* linear interpolation - geometric series
983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
1003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
1013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
1023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
1033a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1043a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
1053a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
1063a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
1073a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1083a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MA1, MP1 )										;\
1093a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
1103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MA2, MP2 ))										;\
1123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
1133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
1153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
1163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
1183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
1193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
1213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
1223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
1233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
1243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* linear interpolation - geometric series with roundoff
1253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
1263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * this is a generalization of Blinn's formula to signed arithmetic
1273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
1283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that M80 is a register with the 0x0080008000800080 constant
1293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
1303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
1313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
1323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
1333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
1343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
1363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
1373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
1383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(15), MP1 )		/*                 q1 > p1 ? 1 : 0               */	;\
1403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(15), MP2 ))		/*                 q2 > q2 ? 1 : 0               */	;\
1413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSLLW      ( CONST(8), MP1 )		/*             q1 > p1 ? 0x100 : 0               */	;\
1433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW      ( CONST(8), MP2 ))		/*             q2 > q2 ? 0x100 : 0               */	;\
1443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSUBW      ( MP1, MA1 )			/*                  t1 -=? 0x100                 */	;\
1463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW      ( MP2, MA2 ))			/*                  t2 -=? 0x100                 */	;\
1473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 													;\
1483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\
1493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\
1503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MA1, MP1 )										;\
1523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
1533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MA2, MP2 ))										;\
1553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
1563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
1583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
1593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
1613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
1623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
1643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
1653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
1663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
1673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* linear interpolation - geometric series with correction
1683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
1693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
1703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
1713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
1723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
1733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that although is faster than rounding off it doesn't give always the exact results
1743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
1753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
1763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
1773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
1783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
1793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
1813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
1823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
1833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MA1, MP1 )										;\
1853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
1863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MA2, MP2 ))										;\
1883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
1893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MA1, MP1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
1913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(7), MA1 )		/*                    t1 >> 15                   */	;\
1923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MA2, MP2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
1943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(7), MA2 ))		/*                    t2 >> 15                   */	;\
1953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MP1, MA1 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */	;\
1973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MP2, MA2 ))			/*  t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8  */	;\
1983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
1993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
2003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
2013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
2023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
2033a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
2043a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2053a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2063a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* common blending setup code
2073a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
2083a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
2093a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
2103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *   PXOR      ( M00, M00 )
2113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
2123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LOAD(rgba, dest, MPP, MQQ) \
2133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD       ( REGIND(rgba), MPP ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\
2143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD       ( REGIND(dest), MQQ ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\
2153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
2163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( REGIND(rgba), MPP ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\
2173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( REGIND(dest), MQQ ))		/* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */
2183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
2203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MP1, MP2 ))										;\
2213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MQ1, MQ2 ))										;\
2223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
2233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUNPCKLBW  ( M00, MQ1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */	;\
2243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHBW  ( M00, MQ2 ))                    /*    qa2    |    qb2    |    qg2    |    qr2    */	;\
2253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUNPCKLBW  ( M00, MP1 )			/*    pa1    |    pb1    |    pg1    |    pr1    */	;\
2263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHBW  ( M00, MP2 ))                    /*    pa2    |    pb2    |    pg2    |    pr2    */
2273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
2293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MP1, MA1 )										;\
2303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MP2, MA2 ))										;\
2313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
2323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUNPCKHWD  ( MA1, MA1 )			/*    pa1    |    pa1    |           |           */	;\
2333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHWD  ( MA2, MA2 ))			/*    pa2    |    pa2    |           |           */	;\
2343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUNPCKHDQ  ( MA1, MA1 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */	;\
2353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHDQ  ( MA2, MA2 ))                    /*    pa2    |    pa2    |    pa2    |    pa2    */
2363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_PACK( MS1, MS2 ) \
2383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PACKUSWB   ( MS2, MS1 )			/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\
2393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_STORE(rgba, MSS ) \
2413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD       ( MSS, REGIND(rgba) ))		/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */	;\
2423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MSS, REGIND(rgba) ))		/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */
2433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006
2453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment
2463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * constants (via pushl/movq)
2473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    SEG_DATA
2483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgALIGNDATA8
2503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgconst_0080:
2513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    D_LONG 0x00800080, 0x00800080
2523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgconst_80:
2543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    D_LONG 0x80808080, 0x80808080
2553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org*/
2563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_0080_l 0x00800080
2573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_0080_h 0x00800080
2583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_80_l 0x80808080
2593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_80_h 0x80808080
2603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    SEG_TEXT
2623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend transparency function
2653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
2663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_transparency)
2683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_transparency)
2693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
2713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */
2723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \
2743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
2753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )								;\
2763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_ALPHA( MM1, MM3, MM4, MM6 )									;\
2773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 )							;\
2783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_PACK( MM3, MM6 )										;\
2793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_STORE( rgba, MM3 )
2803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h"
2823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend add function
2853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org *
2863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * FIXME: Add some loop unrolling here...
2873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
2883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_add)
2903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_add)
2913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT
2933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
2943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \
2953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD       ( REGIND(rgba), MM1 ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\
2963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD       ( REGIND(dest), MM2 ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\
2973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(PADDUSB    ( MM2, MM1 ))										;\
2983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD       ( MM1, REGIND(rgba) ))		/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */	;\
2993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org													;\
3003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( REGIND(rgba), MM1 ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\
3013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDUSB    ( REGIND(dest), MM1 ))		/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\
3023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ       ( MM1, REGIND(rgba) ))
3033a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3043a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h"
3053a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3063a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3073a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend min function
3083a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
3093a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_min)
3113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_min)
3123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn 2nd July 2006
3143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment instructions
3153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
3163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( CONTENT(const_80), MM7 )
3173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
3183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
3193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUSH_L     ( CONST(const_80_h) ) 		/* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/	;\
3203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUSH_L     ( CONST(const_80_l) ) 									;\
3213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( REGIND(ESP), MM7 ) 									;\
3223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    ADD_L      ( CONST(8), ESP)
3233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \
3253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
3263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MM1, MM3 )										;\
3273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MM2, MM4 )										;\
3283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PXOR       ( MM7, MM3 )			/*              unsigned -> signed               */	;\
3293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PXOR       ( MM7, MM4 )			/*              unsigned -> signed               */	;\
3303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PCMPGTB    ( MM3, MM4 )			/*                 q > p ? 0xff : 0x00           */	;\
3313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PAND       ( MM4, MM1 )			/*                 q > p ? p : 0                 */	;\
3323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PANDN      ( MM2, MM4 )			/*                 q > p ? 0 : q                 */	;\
3333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    POR        ( MM1, MM4 )			/*                 q > p ? p : q                 */	;\
3343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_STORE( rgba, MM4 )
3353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h"
3373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend max function
3403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
3413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_max)
3433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_max)
3443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn 2nd July 2006
3463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment instructions
3473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
3483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( CONTENT(const_80), MM7 )
3493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
3503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
3513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUSH_L     ( CONST(const_80_l) ) 		/* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/	;\
3523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUSH_L     ( CONST(const_80_h) ) 									;\
3533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( REGIND(ESP), MM7 ) 									;\
3543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    ADD_L      ( CONST(8), ESP)
3553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \
3573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
3583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MM1, MM3 )										;\
3593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( MM2, MM4 )										;\
3603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PXOR       ( MM7, MM3 )			/*              unsigned -> signed               */	;\
3613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PXOR       ( MM7, MM4 )			/*              unsigned -> signed               */	;\
3623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PCMPGTB    ( MM3, MM4 )			/*                 q > p ? 0xff : 0x00           */	;\
3633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PAND       ( MM4, MM2 )			/*                 q > p ? q : 0                 */	;\
3643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PANDN      ( MM1, MM4 )			/*                 q > p ? 0 : p                 */	;\
3653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    POR        ( MM2, MM4 )			/*                 q > p ? p : q                 */	;\
3663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_STORE( rgba, MM4 )
3673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h"
3693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend modulate function
3723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
3733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_modulate)
3753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_modulate)
3763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn 2nd July 2006
3783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment instructions
3793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
3803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( CONTENT(const_0080), MM7 )
3813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */
3823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \
3833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */	;\
3843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUSH_L     ( CONST(const_0080_l) ) 	/*   0x0080  |   0x0080  |   0x0080  |   0x0080  */	;\
3853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    PUSH_L     ( CONST(const_0080_h) ) 								;\
3863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    MOVQ       ( REGIND(ESP), MM7 ) 									;\
3873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    ADD_L      ( CONST(8), ESP)
3883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \
3903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
3913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )								;\
3923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 )								;\
3933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_PACK( MM2, MM5 )										;\
3943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org    GMB_STORE( rgba, MM2 )
3953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h"
3973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
3983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#endif
3993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org
4003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#if defined (__ELF__) && defined (__linux__)
4013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org	.section .note.GNU-stack,"",%progbits
4023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#endif
403