17d39c1ae76cc7dc6793980fd83db100399ee9179Brian ; 29add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul/* 33474e9de924d92a941b4ea33ecc694f5fad2651fJosé Fonseca * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk> 49add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 59add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 6afb833d4e89c312460a4ab9ed6a7a8ca4ebbfe1cjtg 7462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#ifdef USE_MMX_ASM 83474e9de924d92a941b4ea33ecc694f5fad2651fJosé Fonseca#include "assyntax.h" 9462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#include "matypes.h" 1055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 1155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* integer multiplication - alpha plus one 1255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 1355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * makes the following approximation to the division (Sree) 149add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 159add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * rgb*a/255 ~= (rgb*(a+1)) >> 256 169add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 179add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * which is the fastest method that satisfies the following OpenGL criteria 189add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 199add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 0*0 = 0 and 255*255 = 255 209add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 2155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making 2255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 2355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * PCMPEQW ( MX1, MX1 ) 249add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 2555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ 2655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ 2755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 2804df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 2904df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ 3055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 3155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 3255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ 3355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */ 3455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 3555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 3655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* integer multiplication - geometric series 3755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 3855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * takes the geometric series approximation to the division 399add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 409add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. 419add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 429add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * in this case just the first two terms to fit in 16bit arithmetic 439add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 449add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * t/255 ~= (t + (t >> 8)) >> 8 459add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 469add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, 479add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * so the special case a = 255 must be accounted or roundoff must be used 489add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 4955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ 5055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 5155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 5255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 5355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca MOVQ ( MA1, MP1 ) ;\ 5404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 5555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 5604df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 5704df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 5855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 5955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 6055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 6104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 6204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 6355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 6455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 6555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 6655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* integer multiplication - geometric series plus rounding 6755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 689add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * when using a geometric series division instead of truncating the result 699add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * use roundoff in the approximation (Jim Blinn) 709add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 719add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * t = rgb*a + 0x80 729add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * 739add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul * achieving the exact results 7455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 7555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that M80 is register with the 0x0080008000800080 constant 769add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul */ 7755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ 7855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ 7955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ 8004df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 8104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ 8255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ 8355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 8455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca MOVQ ( MA1, MP1 ) ;\ 8504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 8655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 8704df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 8804df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 8955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 9055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 9155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 9204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 9304df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 9455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 9555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 9655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 9755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* linear interpolation - geometric series 9855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */ 9955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ 10055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 10155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 10255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 10304df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 10404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 10504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 10655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 10755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 10855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca MOVQ ( MA1, MP1 ) ;\ 10904df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 11055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 11104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 11204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 11355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 11455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 11555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 11655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 11755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 11855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 11955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 12055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 12155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 12255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 12355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 12455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* linear interpolation - geometric series with roundoff 12555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 12655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * this is a generalization of Blinn's formula to signed arithmetic 12755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 12855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that M80 is a register with the 0x0080008000800080 constant 12955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */ 13055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ 13155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 13255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 13355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 13404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 13504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 13604df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 13755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 13855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 13955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ 14055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\ 14155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 14255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\ 14355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\ 14455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 14555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\ 14655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\ 14755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 14855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ 14955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ 15055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 15155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca MOVQ ( MA1, MP1 ) ;\ 15204df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 15355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 15404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 15504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 15655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 15755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 15855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 15955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 16055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 16155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 16255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 16355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 16455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 16555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 16655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 16755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca/* linear interpolation - geometric series with correction 16855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 16955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria 170cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * 171cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8 172cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * 173cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul * note that although is faster than rounding off it doesn't give always the exact results 174cffb351a62ebc0e1954422cf749458106671b9d6Brian Paul */ 17555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ 17655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ 17755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ 17855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ 17904df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca ;\ 18004df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ 18104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ 18255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ 18355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 18455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca MOVQ ( MA1, MP1 ) ;\ 18504df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ 18655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 18704df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(MOVQ ( MA2, MP2 )) ;\ 18804df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ 18955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 19004df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ 19104df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose Fonseca PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ 19255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 19304df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ 19404df3bbe8e12d7ac44936d5de75933b28a51a8e3Jose FonsecaTWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ 19555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 19655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ 19755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ 19855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 19955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ 20055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ 20155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 20255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ 20355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ 20455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 20555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 206533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca/* common blending setup code 20755d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 20855d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making 20955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * 21055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca * PXOR ( M00, M00 ) 21155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */ 212533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_LOAD(rgba, dest, MPP, MQQ) \ 213533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ 214533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ 21555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 216533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaTWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ 217533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaTWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ 218533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 219533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \ 22055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(MOVQ ( MP1, MP2 )) ;\ 22155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(MOVQ ( MQ1, MQ2 )) ;\ 22255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 22355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\ 22455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\ 22555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\ 226533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaTWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */ 227533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 228533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_ALPHA(MP1, MA1, MP2, MA2) \ 22955d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca MOVQ ( MP1, MA1 ) ;\ 23055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(MOVQ ( MP2, MA2 )) ;\ 23155d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca ;\ 23255d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\ 23355d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\ 23455d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\ 23555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose FonsecaTWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */ 23655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca 237533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_PACK( MS1, MS2 ) \ 238533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ 2399add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 240533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define GMB_STORE(rgba, MSS ) \ 2413fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ 2423fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ 243d60bb2fbc8b61e9748ce9c235acd4e870a2df613Jose Fonseca 2447d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006 2457d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment 2467d39c1ae76cc7dc6793980fd83db100399ee9179Brian * constants (via pushl/movq) 247533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca SEG_DATA 248533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 249533e88824af9f60a926e7b70ddd40ad1386be686Jose FonsecaALIGNDATA8 2503fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonsecaconst_0080: 2513fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca D_LONG 0x00800080, 0x00800080 2523fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 253533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonsecaconst_80: 2543fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca D_LONG 0x80808080, 0x80808080 2557d39c1ae76cc7dc6793980fd83db100399ee9179Brian*/ 2567d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_0080_l 0x00800080 2577d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_0080_h 0x00800080 2587d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_80_l 0x80808080 2597d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define const_80_h 0x80808080 260533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 261533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca SEG_TEXT 262d60bb2fbc8b61e9748ce9c235acd4e870a2df613Jose Fonseca 2639add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 2643fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend transparency function 26555d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca */ 2669add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 267b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_transparency) 268b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_transparency) 2699add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 27055d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#define INIT \ 271533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ 272533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 273533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define MAIN( rgba, dest ) \ 274533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 275533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ 276533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\ 277533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\ 278533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_PACK( MM3, MM6 ) ;\ 279533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_STORE( rgba, MM3 ) 280533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 281533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#include "mmx_blendtmp.h" 282533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 283533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 2843fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend add function 2853fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca * 2863fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca * FIXME: Add some loop unrolling here... 2870c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca */ 2880c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca 289b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_add) 290b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_add) 2910c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca 2920c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca#define INIT 2930c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca 2940c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca#define MAIN( rgba, dest ) \ 2953fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ 2963fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ 2973fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(PADDUSB ( MM2, MM1 )) ;\ 2983fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ 2993fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca ;\ 3003fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ 3013fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ 3023fe2bb8933c15a7091838fd982dbad402fe6ad43Jose FonsecaTWO(MOVQ ( MM1, REGIND(rgba) )) 3033fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3043fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#include "mmx_blendtmp.h" 3053fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3063fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3073fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend min function 3083fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca */ 3093fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 310b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_min) 311b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_min) 3123fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3137d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn 2nd July 2006 3147d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment instructions 3157d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define INIT \ 3167d39c1ae76cc7dc6793980fd83db100399ee9179Brian MOVQ ( CONTENT(const_80), MM7 ) 3177d39c1ae76cc7dc6793980fd83db100399ee9179Brian */ 3183fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define INIT \ 3197d39c1ae76cc7dc6793980fd83db100399ee9179Brian PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ 3207d39c1ae76cc7dc6793980fd83db100399ee9179Brian PUSH_L ( CONST(const_80_l) ) ;\ 3217d39c1ae76cc7dc6793980fd83db100399ee9179Brian MOVQ ( REGIND(ESP), MM7 ) ;\ 3227d39c1ae76cc7dc6793980fd83db100399ee9179Brian ADD_L ( CONST(8), ESP) 3233fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3243fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define MAIN( rgba, dest ) \ 3250c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3263fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca MOVQ ( MM1, MM3 ) ;\ 3273fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca MOVQ ( MM2, MM4 ) ;\ 3283fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ 3293fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ 3303fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ 3313fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\ 3323fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\ 3333fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca POR ( MM1, MM4 ) /* q > p ? p : q */ ;\ 3343fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca GMB_STORE( rgba, MM4 ) 3353fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3363fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#include "mmx_blendtmp.h" 3373fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3383fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3393fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend max function 3403fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca */ 3413fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 342b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_max) 343b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_max) 3443fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3457d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn 2nd July 2006 3467d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment instructions 3473fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define INIT \ 3487d39c1ae76cc7dc6793980fd83db100399ee9179Brian MOVQ ( CONTENT(const_80), MM7 ) 3497d39c1ae76cc7dc6793980fd83db100399ee9179Brian */ 3507d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define INIT \ 3517d39c1ae76cc7dc6793980fd83db100399ee9179Brian PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ 3527d39c1ae76cc7dc6793980fd83db100399ee9179Brian PUSH_L ( CONST(const_80_h) ) ;\ 3537d39c1ae76cc7dc6793980fd83db100399ee9179Brian MOVQ ( REGIND(ESP), MM7 ) ;\ 3547d39c1ae76cc7dc6793980fd83db100399ee9179Brian ADD_L ( CONST(8), ESP) 3553fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca 3563fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca#define MAIN( rgba, dest ) \ 3573fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 3583fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca MOVQ ( MM1, MM3 ) ;\ 3593fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca MOVQ ( MM2, MM4 ) ;\ 3603fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ 3613fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ 3623fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ 3633fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\ 3643fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\ 3653fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca POR ( MM2, MM4 ) /* q > p ? p : q */ ;\ 3663fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca GMB_STORE( rgba, MM4 ) 3670c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca 3680c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca#include "mmx_blendtmp.h" 3690c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca 3700c527ab0546eb1de9ee10cc31bc386a40e6b3f98Jose Fonseca 3713fe2bb8933c15a7091838fd982dbad402fe6ad43Jose Fonseca/* Blend modulate function 372533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca */ 373533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 374b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define TAG(x) CONCAT(x,_modulate) 375b305028464f02947c0cce0476af0e35f4ed1fafaBrian Paul#define LLTAG(x) LLBL2(x,_modulate) 376533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca 3777d39c1ae76cc7dc6793980fd83db100399ee9179Brian/* Kevin F. Quinn 2nd July 2006 3787d39c1ae76cc7dc6793980fd83db100399ee9179Brian * Replace data segment constants with text-segment instructions 3797d39c1ae76cc7dc6793980fd83db100399ee9179Brian#define INIT \ 3807d39c1ae76cc7dc6793980fd83db100399ee9179Brian MOVQ ( CONTENT(const_0080), MM7 ) 3817d39c1ae76cc7dc6793980fd83db100399ee9179Brian */ 382533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define INIT \ 383533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ 3847d39c1ae76cc7dc6793980fd83db100399ee9179Brian PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\ 3857d39c1ae76cc7dc6793980fd83db100399ee9179Brian PUSH_L ( CONST(const_0080_h) ) ;\ 3867d39c1ae76cc7dc6793980fd83db100399ee9179Brian MOVQ ( REGIND(ESP), MM7 ) ;\ 3877d39c1ae76cc7dc6793980fd83db100399ee9179Brian ADD_L ( CONST(8), ESP) 3889add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 389533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca#define MAIN( rgba, dest ) \ 390533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ 391533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ 392533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\ 393533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_PACK( MM2, MM5 ) ;\ 394533e88824af9f60a926e7b70ddd40ad1386be686Jose Fonseca GMB_STORE( rgba, MM2 ) 395afb833d4e89c312460a4ab9ed6a7a8ca4ebbfe1cjtg 39655d9ee83b4c29e8f7c373ee6326bbb4f77402beeJose Fonseca#include "mmx_blendtmp.h" 3979add9a21d8c51ee4238169265541fa9a40f0a8b0Brian Paul 398462183fe4cb6df6d90632d9e2cee881c8d26b1cbAlan Hourihane#endif 399fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg 400fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg#if defined (__ELF__) && defined (__linux__) 401fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg .section .note.GNU-stack,"",%progbits 402fcdc6a7d2488defd66bc7e8398c6d8c9a6190a1aKristian Høgsberg#endif 403