13a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ; 23a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* 33a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk> 43a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 53a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 63a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 73a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#ifdef USE_MMX_ASM 83a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "assyntax.h" 93a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "matypes.h" 103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* integer multiplication - alpha plus one 123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * makes the following approximation to the division (Sree) 143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * rgb*a/255 ~= (rgb*(a+1)) >> 256 163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * which is the fastest method that satisfies the following OpenGL criteria 183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 0*0 = 0 and 255*255 = 255 203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making 223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * PCMPEQW ( MX1, MX1 ) 243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ 263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ 273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ 303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ 333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */ 343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* integer multiplication - geometric series 373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * takes the geometric series approximation to the division 393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. 413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * in this case just the first two terms to fit in 16bit arithmetic 433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * t/255 ~= (t + (t >> 8)) >> 8 453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, 473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * so the special case a = 255 must be accounted or roundoff must be used 483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ 503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MA1, MP1 ) ;\ 543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MA2, MP2 )) ;\ 573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* integer multiplication - geometric series plus rounding 673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * when using a geometric series division instead of truncating the result 693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * use roundoff in the approximation (Jim Blinn) 703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * t = rgb*a + 0x80 723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * achieving the exact results 743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that M80 is register with the 0x0080008000800080 constant 763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ 783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ 803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ 833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MA1, MP1 ) ;\ 853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MA2, MP2 )) ;\ 883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* linear interpolation - geometric series 983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ 1003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 1013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 1023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 1033a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1043a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 1053a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 1063a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 1073a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1083a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MA1, MP1 ) ;\ 1093a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 1103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MA2, MP2 )) ;\ 1123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 1133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 1153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 1163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 1183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 1193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 1213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 1223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 1233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 1243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* linear interpolation - geometric series with roundoff 1253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 1263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * this is a generalization of Blinn's formula to signed arithmetic 1273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 1283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that M80 is a register with the 0x0080008000800080 constant 1293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 1303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ 1313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 1323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 1333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 1343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 1363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 1373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 1383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ 1403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\ 1413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\ 1433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\ 1443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\ 1463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\ 1473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ 1493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ 1503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MA1, MP1 ) ;\ 1523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 1533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MA2, MP2 )) ;\ 1553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 1563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 1583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 1593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 1613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 1623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 1643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 1653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 1663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 1673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* linear interpolation - geometric series with correction 1683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 1693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria 1703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 1713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8 1723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 1733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that although is faster than rounding off it doesn't give always the exact results 1743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 1753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ 1763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 1773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 1783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 1793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 1813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 1823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 1833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MA1, MP1 ) ;\ 1853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 1863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MA2, MP2 )) ;\ 1883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 1893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 1913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ 1923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 1943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ 1953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ 1973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ 1983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 1993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 2003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 2013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 2023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 2033a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 2043a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2053a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2063a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* common blending setup code 2073a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 2083a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making 2093a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 2103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * PXOR ( M00, M00 ) 2113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 2123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_LOAD(rgba, dest, MPP, MQQ) \ 2133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ 2143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ 2153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 2163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ 2173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ 2183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \ 2203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MP1, MP2 )) ;\ 2213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MQ1, MQ2 )) ;\ 2223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 2233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\ 2243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\ 2253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\ 2263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */ 2273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_ALPHA(MP1, MA1, MP2, MA2) \ 2293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MP1, MA1 ) ;\ 2303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MP2, MA2 )) ;\ 2313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 2323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\ 2333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\ 2343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\ 2353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */ 2363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_PACK( MS1, MS2 ) \ 2383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ 2393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define GMB_STORE(rgba, MSS ) \ 2413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ 2423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ 2433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006 2453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment 2463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * constants (via pushl/movq) 2473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org SEG_DATA 2483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgALIGNDATA8 2503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgconst_0080: 2513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org D_LONG 0x00800080, 0x00800080 2523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgconst_80: 2543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org D_LONG 0x80808080, 0x80808080 2553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org*/ 2563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_0080_l 0x00800080 2573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_0080_h 0x00800080 2583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_80_l 0x80808080 2593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define const_80_h 0x80808080 2603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org SEG_TEXT 2623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend transparency function 2653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 2663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_transparency) 2683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_transparency) 2693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 2713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ 2723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \ 2743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 2753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ 2763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\ 2773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\ 2783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_PACK( MM3, MM6 ) ;\ 2793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_STORE( rgba, MM3 ) 2803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h" 2823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend add function 2853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * 2863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * FIXME: Add some loop unrolling here... 2873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 2883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_add) 2903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_add) 2913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT 2933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 2943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \ 2953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ 2963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ 2973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(PADDUSB ( MM2, MM1 )) ;\ 2983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ 2993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ;\ 3003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ 3013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ 3023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.orgTWO(MOVQ ( MM1, REGIND(rgba) )) 3033a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3043a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h" 3053a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3063a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3073a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend min function 3083a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 3093a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3103a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_min) 3113a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_min) 3123a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3133a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn 2nd July 2006 3143a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment instructions 3153a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 3163a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( CONTENT(const_80), MM7 ) 3173a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 3183a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 3193a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ 3203a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUSH_L ( CONST(const_80_l) ) ;\ 3213a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( REGIND(ESP), MM7 ) ;\ 3223a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ADD_L ( CONST(8), ESP) 3233a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3243a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \ 3253a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3263a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MM1, MM3 ) ;\ 3273a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MM2, MM4 ) ;\ 3283a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ 3293a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ 3303a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ 3313a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\ 3323a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\ 3333a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org POR ( MM1, MM4 ) /* q > p ? p : q */ ;\ 3343a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_STORE( rgba, MM4 ) 3353a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3363a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h" 3373a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3383a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3393a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend max function 3403a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 3413a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3423a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_max) 3433a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_max) 3443a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3453a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn 2nd July 2006 3463a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment instructions 3473a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 3483a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( CONTENT(const_80), MM7 ) 3493a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 3503a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 3513a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ 3523a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUSH_L ( CONST(const_80_h) ) ;\ 3533a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( REGIND(ESP), MM7 ) ;\ 3543a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ADD_L ( CONST(8), ESP) 3553a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3563a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \ 3573a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3583a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MM1, MM3 ) ;\ 3593a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( MM2, MM4 ) ;\ 3603a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ 3613a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ 3623a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ 3633a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\ 3643a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\ 3653a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org POR ( MM2, MM4 ) /* q > p ? p : q */ ;\ 3663a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_STORE( rgba, MM4 ) 3673a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3683a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h" 3693a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3703a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3713a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Blend modulate function 3723a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 3733a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3743a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define TAG(x) CONCAT(x,_modulate) 3753a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define LLTAG(x) LLBL2(x,_modulate) 3763a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3773a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org/* Kevin F. Quinn 2nd July 2006 3783a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org * Replace data segment constants with text-segment instructions 3793a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 3803a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( CONTENT(const_0080), MM7 ) 3813a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org */ 3823a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define INIT \ 3833a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ 3843a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\ 3853a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org PUSH_L ( CONST(const_0080_h) ) ;\ 3863a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org MOVQ ( REGIND(ESP), MM7 ) ;\ 3873a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org ADD_L ( CONST(8), ESP) 3883a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3893a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#define MAIN( rgba, dest ) \ 3903a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3913a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ 3923a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\ 3933a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_PACK( MM2, MM5 ) ;\ 3943a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org GMB_STORE( rgba, MM2 ) 3953a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3963a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#include "mmx_blendtmp.h" 3973a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 3983a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#endif 3993a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org 4003a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#if defined (__ELF__) && defined (__linux__) 4013a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org .section .note.GNU-stack,"",%progbits 4023a0db227ffe90888ad760c61a63226988c974e0apatrick@chromium.org#endif 403