mmx_blend.S revision 462183fe4cb6df6d90632d9e2cee881c8d26b1cb
1462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane 29add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul/* 39add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk> 49add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 59add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 6afb833d4e89c312460a4ab9ed6a7a8ca4ebbfe1cJochen Gerlach 7462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#ifdef USE_MMX_ASM 8462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#include "matypes.h" 955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 1055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca/* integer multiplication - alpha plus one 1155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 1255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * makes the following approximation to the division (Sree) 139add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 149add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * rgb*a/255 ~= (rgb*(a+1)) >> 256 159add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 169add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * which is the fastest method that satisfies the following OpenGL criteria 179add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 189add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 0*0 = 0 and 255*255 = 255 199add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 2055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making 2155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 2255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * PCMPEQW ( MX1, MX1 ) 239add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 2455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ 2555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ 2655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 2704df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 2804df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ 2955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 3055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 3155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ 3255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */ 3355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 3455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 3555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca/* integer multiplication - geometric series 3655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 3755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * takes the geometric series approximation to the division 389add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 399add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. 409add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 419add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * in this case just the first two terms to fit in 16bit arithmetic 429add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 439add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * t/255 ~= (t + (t >> 8)) >> 8 449add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 459add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, 469add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * so the special case a = 255 must be accounted or roundoff must be used 479add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 4855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ 4955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 5055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 5155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 5255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca MOVQ ( MA1, MP1 ) ;\ 5304df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 5455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 5504df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 5604df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 5755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 5855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 5955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 6004df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 6104df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 6255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 6355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 6455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 6555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca/* integer multiplication - geometric series plus rounding 6655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 679add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * when using a geometric series division instead of truncating the result 689add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * use roundoff in the approximation (Jim Blinn) 699add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 709add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * t = rgb*a + 0x80 719add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 729add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * achieving the exact results 7355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 7455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * note that M80 is register with the 0x0080008000800080 constant 759add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 7655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ 7755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 7855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ 7904df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 8004df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 8155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ 8255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 8355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca MOVQ ( MA1, MP1 ) ;\ 8404df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 8555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 8604df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 8704df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 8855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 8955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 9055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 9104df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 9204df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 9355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 9455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 9555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 9655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca/* linear interpolation - geometric series 9755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca */ 9855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ 9955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 10055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 10155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 10204df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 10304df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 10404df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 10555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 10655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 10755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca MOVQ ( MA1, MP1 ) ;\ 10804df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 10955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 11004df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 11104df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 11255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 11355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 11455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 11555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 11655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 11755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 11855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 11955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 12055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 12155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 12255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 12355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca/* linear interpolation - geometric series with roundoff 12455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 12555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * this is a generalization of Blinn's formula to signed arithmetic 12655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 12755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * note that M80 is a register with the 0x0080008000800080 constant 12855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca */ 12955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ 13055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 13155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 13255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 13304df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 13404df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 13504df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 13655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 13755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 13855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ 13955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\ 14055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 14155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\ 14255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\ 14355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 14455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\ 14555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\ 14655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 14755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ 14855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ 14955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 15055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca MOVQ ( MA1, MP1 ) ;\ 15104df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 15255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 15304df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 15404df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 15555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 15655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 15755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 15855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 15955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 16055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 16155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 16255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 16355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 16455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 16555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 16655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca/* linear interpolation - geometric series with correction 16755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 16855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria 169cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * 170cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8 171cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * 172cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * note that although is faster than rounding off it doesn't give always the exact results 173cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul */ 17455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ 17555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 17655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 17755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 17804df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca ;\ 17904df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 18004df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 18155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 18255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 18355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca MOVQ ( MA1, MP1 ) ;\ 18404df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 18555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 18604df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 18704df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 18855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 18904df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 19004df3bbe8e12d7ac44936d5de75933b28a51a8e3José Fonseca PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ 19155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 19204df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 19304df3bbe8e12d7ac44936d5de75933b28a51a8e3José FonsecaTWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ 19455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 19555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ 19655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ 19755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 19855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 19955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 20055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 20155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 20255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 20355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 20455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 205533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca/* common blending setup code 20655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 20755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making 20855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * 20955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca * PXOR ( M00, M00 ) 21055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca */ 211533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define GMB_LOAD(rgba, dest, MPP, MQQ) \ 212533e88824af9f60a926e7b70ddd40ad1386be686José FonsecaONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ 213533e88824af9f60a926e7b70ddd40ad1386be686José FonsecaONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ 21455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 215533e88824af9f60a926e7b70ddd40ad1386be686José FonsecaTWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ 216533e88824af9f60a926e7b70ddd40ad1386be686José FonsecaTWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ 217533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 218533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \ 21955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(MOVQ ( MP1, MP2 )) ;\ 22055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(MOVQ ( MQ1, MQ2 )) ;\ 22155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 22255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\ 22355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\ 22455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\ 225533e88824af9f60a926e7b70ddd40ad1386be686José FonsecaTWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */ 226533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 227533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define GMB_ALPHA(MP1, MA1, MP2, MA2) \ 22855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca MOVQ ( MP1, MA1 ) ;\ 22955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(MOVQ ( MP2, MA2 )) ;\ 23055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca ;\ 23155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\ 23255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\ 23355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\ 23455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé FonsecaTWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */ 23555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca 236533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define GMB_PACK( MS1, MS2 ) \ 237533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ 2389add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 239533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define GMB_STORE(rgba, MSS ) \ 2403fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ 2413fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaTWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ 242d60bb2fbc8b61e9748ce9c235acd4e870a2df613José Fonseca 243533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 244533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca SEG_DATA 245533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 246533e88824af9f60a926e7b70ddd40ad1386be686José FonsecaALIGNDATA8 2473fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonsecaconst_0080: 2483fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca D_LONG 0x00800080, 0x00800080 2493fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 250533e88824af9f60a926e7b70ddd40ad1386be686José Fonsecaconst_80: 2513fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca D_LONG 0x80808080, 0x80808080 252533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 253533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca SEG_TEXT 254d60bb2fbc8b61e9748ce9c235acd4e870a2df613José Fonseca 2559add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 2563fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca/* Blend transparency function 25755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca */ 2589add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 259b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_transparency) 260b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_transparency) 2619add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 26255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#define INIT \ 263533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ 264533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 265533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define MAIN( rgba, dest ) \ 266533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 267533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ 268533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\ 269533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\ 270533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_PACK( MM3, MM6 ) ;\ 271533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_STORE( rgba, MM3 ) 272533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 273533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#include "mmx_blendtmp.h" 274533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 275533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 2763fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca/* Blend add function 2773fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca * 2783fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca * FIXME: Add some loop unrolling here... 2790c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca */ 2800c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca 281b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_add) 282b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_add) 2830c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca 2840c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca#define INIT 2850c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca 2860c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca#define MAIN( rgba, dest ) \ 2873fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ 2883fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ 2893fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaONE(PADDUSB ( MM2, MM1 )) ;\ 2903fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ 2913fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca ;\ 2923fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaTWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ 2933fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaTWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ 2943fe2bb8933c15a7091838fd982dbad402fe6ad43José FonsecaTWO(MOVQ ( MM1, REGIND(rgba) )) 2953fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 2963fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca#include "mmx_blendtmp.h" 2973fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 2983fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 2993fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca/* Blend min function 3003fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca */ 3013fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 302b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_min) 303b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_min) 3043fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3053fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca#define INIT \ 3063fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( CONTENT(const_80), MM7 ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ 3073fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3083fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca#define MAIN( rgba, dest ) \ 3090c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3103fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( MM1, MM3 ) ;\ 3113fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( MM2, MM4 ) ;\ 3123fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ 3133fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ 3143fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ 3153fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\ 3163fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\ 3173fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca POR ( MM1, MM4 ) /* q > p ? p : q */ ;\ 3183fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca GMB_STORE( rgba, MM4 ) 3193fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3203fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca#include "mmx_blendtmp.h" 3213fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3223fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3233fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca/* Blend max function 3243fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca */ 3253fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 326b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_max) 327b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_max) 3283fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3293fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca#define INIT \ 3303fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( CONTENT(const_80), MM7 ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ 3313fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca 3323fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca#define MAIN( rgba, dest ) \ 3333fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3343fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( MM1, MM3 ) ;\ 3353fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( MM2, MM4 ) ;\ 3363fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ 3373fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ 3383fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ 3393fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\ 3403fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\ 3413fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca POR ( MM2, MM4 ) /* q > p ? p : q */ ;\ 3423fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca GMB_STORE( rgba, MM4 ) 3430c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca 3440c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca#include "mmx_blendtmp.h" 3450c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca 3460c527ab0546eb1de9ee10cc31bc386a40e6b3f98José Fonseca 3473fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca/* Blend modulate function 348533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca */ 349533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 350b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_modulate) 351b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_modulate) 352533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca 353533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define INIT \ 354533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ 3553fe2bb8933c15a7091838fd982dbad402fe6ad43José Fonseca MOVQ ( CONTENT(const_0080), MM7 ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ 3569add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 357533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca#define MAIN( rgba, dest ) \ 358533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 359533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ 360533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\ 361533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_PACK( MM2, MM5 ) ;\ 362533e88824af9f60a926e7b70ddd40ad1386be686José Fonseca GMB_STORE( rgba, MM2 ) 363afb833d4e89c312460a4ab9ed6a7a8ca4ebbfe1cJochen Gerlach 36455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJosé Fonseca#include "mmx_blendtmp.h" 3659add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 366462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#endif 367