19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; pII-optimised MMX format converters for HERMES 39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk) 49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au) 59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; This source code is licensed under the GNU LGPL 69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; Please refer to the file COPYING.LIB contained in the distribution for 89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; licensing conditions 99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; COPYRIGHT NOTICE 119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; This file partly contains code that is (c) Intel Corporation, specifically 139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; the mode detection routine, and the converter to 15 bit (8 pixel 149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; conversion routine from the mmx programming tutorial pages). 159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; These routines aren't exactly pII optimised - it's just that as they 189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; are, they're terrible on p5 MMXs, but less so on pIIs. Someone needs to 199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; optimise them for p5 MMXs.. 209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallBITS 32 229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%include "common.inc" 249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_24RGB888 269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16RGB565 279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16BGR565 289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16RGB555 299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16BGR555 309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;; Macros for conversion routines 329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%macro _push_immq_mask 1 349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall push dword %1 359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall push dword %1 369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endmacro 379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%macro load_immq 2 399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _push_immq_mask %2 409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq %1, [esp] 419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endmacro 429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%macro pand_immq 2 449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _push_immq_mask %2 459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand %1, [esp] 469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endmacro 479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define CLEANUP_IMMQ_LOADS(num) \ 499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esp, byte 8 * num 509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb888_mask 00ffffffh 529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb565_b 000000f8h 539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb565_g 0000fc00h 549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb565_r 00f80000h 559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb555_rb 00f800f8h 579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb555_g 0000f800h 589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb555_mul 20000008h 599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_bgr555_mul 00082000h 609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSECTION .text 629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_24RGB888: 649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ; set up mm6 as the mask, mm7 as zero 669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm6, mmx32_rgb888_mask 679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CLEANUP_IMMQ_LOADS(1) 689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pxor mm7, mm7 699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov edx, ecx ; save ecx 719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ecx, 0fffffffch ; clear lower two bits 729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L1 739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jmp .L2 749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1: 769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm0, [esi] ; A R G B a r g b 789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm0, mm6 ; 0 R G B 0 r g b 799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1, [esi+8] ; A R G B a r g b 809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm1, mm6 ; 0 R G B 0 r g b 819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm2, mm0 ; 0 R G B 0 r g b 839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckhdq mm2, mm7 ; 0 0 0 0 0 R G B 849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckldq mm0, mm7 ; 0 0 0 0 0 r g b 859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq mm2, 24 ; 0 0 R G B 0 0 0 869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm0, mm2 ; 0 0 R G B r g b 879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm3, mm1 ; 0 R G B 0 r g b 899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq mm3, 48 ; g b 0 0 0 0 0 0 909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm0, mm3 ; g b R G B r g b 919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm4, mm1 ; 0 R G B 0 r g b 939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckhdq mm4, mm7 ; 0 0 0 0 0 R G B 949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckldq mm1, mm7 ; 0 0 0 0 0 r g b 959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlq mm1, 16 ; 0 0 0 R G B 0 r 969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq mm4, 8 ; 0 0 0 0 R G B 0 979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm1, mm4 ; 0 0 0 0 R G B r 989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq [edi], mm0 1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi, BYTE 16 1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd [edi+8], mm1 1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi, BYTE 12 1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall sub ecx, BYTE 4 1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L1 1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2: 1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov ecx, edx 1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ecx, BYTE 3 1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jz .L4 1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3: 1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov al, [esi] 1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov bl, [esi+1] 1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov dl, [esi+2] 1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi], al 1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi+1], bl 1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi+2], dl 1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi, BYTE 4 1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi, BYTE 3 1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dec ecx 1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L3 1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4: 1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall retn 1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16RGB565: 1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ; set up masks 1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm5, mmx32_rgb565_b 1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm6, mmx32_rgb565_g 1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm7, mmx32_rgb565_r 1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CLEANUP_IMMQ_LOADS(3) 1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov edx, ecx 1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr ecx, 2 1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L1 1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jmp .L2 ; not necessary at the moment, but doesn't hurt (much) 1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1: 1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm0, [esi] ; argb 1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1, mm0 ; argb 1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm0, mm6 ; 00g0 1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm3, mm1 ; argb 1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm1, mm5 ; 000b 1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm3, mm7 ; 0r00 1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pslld mm1, 2 ; 0 0 000000bb bbb00000 1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm0, mm1 ; 0 0 ggggggbb bbb00000 1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm0, 5 ; 0 0 00000ggg gggbbbbb 1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm4, [esi+8] ; argb 1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm2, mm4 ; argb 1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm4, mm6 ; 00g0 1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1, mm2 ; argb 1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm2, mm5 ; 000b 1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm1, mm7 ; 0r00 1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pslld mm2, 2 ; 0 0 000000bb bbb00000 1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm4, mm2 ; 0 0 ggggggbb bbb00000 1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm4, 5 ; 0 0 00000ggg gggbbbbb 1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packuswb mm3, mm1 ; R 0 r 0 1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packssdw mm0, mm4 ; as above.. ish 1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm0, mm3 ; done. 1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq [edi], mm0 1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi, 16 1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi, 8 1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dec ecx 1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L1 1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2: 1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov ecx, edx 1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ecx, BYTE 3 1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jz .L4 1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3: 1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov al, [esi] 1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov bh, [esi+1] 1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov ah, [esi+2] 1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr al, 3 1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and eax, 0F81Fh ; BYTE? 1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr ebx, 5 1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ebx, 07E0h ; BYTE? 1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add eax, ebx 1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi], al 1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi+1], ah 1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi, BYTE 4 1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi, BYTE 2 1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dec ecx 1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L3 1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4: 1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall retn 1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16BGR565: 1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm5, mmx32_rgb565_r 1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm6, mmx32_rgb565_g 1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm7, mmx32_rgb565_b 1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CLEANUP_IMMQ_LOADS(3) 2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov edx, ecx 2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr ecx, 2 2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L1 2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jmp .L2 2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1: 2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm0, [esi] ; a r g b 2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1, mm0 ; a r g b 2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm0, mm6 ; 0 0 g 0 2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm3, mm1 ; a r g b 2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm1, mm5 ; 0 r 0 0 2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm3, mm7 ; 0 0 0 b 2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq mm3, 16 ; 0 b 0 0 2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm1, 14 ; 0 0 000000rr rrr00000 2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm0, mm1 ; 0 0 ggggggrr rrr00000 2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm0, 5 ; 0 0 00000ggg gggrrrrr 2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm4, [esi+8] ; a r g b 2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm2, mm4 ; a r g b 2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm4, mm6 ; 0 0 g 0 2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1, mm2 ; a r g b 2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm2, mm5 ; 0 r 0 0 2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm1, mm7 ; 0 0 0 b 2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq mm1, 16 ; 0 b 0 0 2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm2, 14 ; 0 0 000000rr rrr00000 2289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm4, mm2 ; 0 0 ggggggrr rrr00000 2299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm4, 5 ; 0 0 00000ggg gggrrrrr 2309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packuswb mm3, mm1 ; BBBBB000 00000000 bbbbb000 00000000 2329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packssdw mm0, mm4 ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR 2339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm0, mm3 ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr 2349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq [edi], mm0 2359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi, BYTE 16 2379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi, BYTE 8 2389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dec ecx 2399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L1 2409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2: 2429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and edx, BYTE 3 2439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jz .L4 2449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3: 2459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov al, [esi+2] 2469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov bh, [esi+1] 2479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov ah, [esi] 2489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr al, 3 2499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and eax, 0F81Fh ; BYTE ? 2509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr ebx, 5 2519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ebx, 07E0h ; BYTE ? 2529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add eax, ebx 2539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi], al 2549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi+1], ah 2559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi, BYTE 4 2569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi, BYTE 2 2579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dec edx 2589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L3 2599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4: 2619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall retn 2629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16BGR555: 2649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ; the 16BGR555 converter is identical to the RGB555 one, 2669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ; except it uses a different multiplier for the pmaddwd 2679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ; instruction. cool huh. 2689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm7, mmx32_bgr555_mul 2709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jmp _convert_bgr555_cheat 2719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; This is the same as the Intel version.. they obviously went to 2739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; much more trouble to expand/coil the loop than I did, so theirs 2749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; would almost certainly be faster, even if only a little. 2759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is 2769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; (I think) a more accurate name.. 2779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16RGB555: 2789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm7, mmx32_rgb555_mul 2809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_convert_bgr555_cheat: 2819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load_immq mm6, mmx32_rgb555_g 2829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CLEANUP_IMMQ_LOADS(2) 2839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov edx,ecx ; Save ecx 2859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ecx,DWORD 0fffffff8h ; clear lower three bits 2879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L_OK 2889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jmp near .L2 2899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L_OK: 2919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm2,[esi+8] 2939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm0,[esi] 2959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm3,mm2 2969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_immq mm3, mmx32_rgb555_rb 2989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1,mm0 2999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_immq mm1, mmx32_rgb555_rb 3019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmaddwd mm3,mm7 3029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CLEANUP_IMMQ_LOADS(2) 3049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmaddwd mm1,mm7 3069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm2,mm6 3079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1: 3099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm4,[esi+24] 3109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm0,mm6 3119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm5,[esi+16] 3139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm3,mm2 3149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm3,6 3169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm1,mm0 3179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm0,mm4 3199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm1,6 3209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_immq mm0, mmx32_rgb555_rb 3229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packssdw mm1,mm3 3239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm3,mm5 3259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmaddwd mm0,mm7 3269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_immq mm3, mmx32_rgb555_rb 3289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm4,mm6 3299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq [edi],mm1 3319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmaddwd mm3,mm7 3329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi,BYTE 32 3349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm4,mm0 3359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm5,mm6 3379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm4,6 3389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm2,[esi+8] 3409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por mm5,mm3 3419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm0,[esi] 3439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld mm5,6 3449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm3,mm2 3469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq mm1,mm0 3479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_immq mm3, mmx32_rgb555_rb 3499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packssdw mm5,mm4 3509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_immq mm1, mmx32_rgb555_rb 3529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand mm2,mm6 3539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall CLEANUP_IMMQ_LOADS(4) 3559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq [edi+8],mm5 3579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmaddwd mm3,mm7 3589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmaddwd mm1,mm7 3609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi,BYTE 16 3619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall sub ecx,BYTE 8 3639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jz .L2 3649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jmp .L1 3659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2: 3689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov ecx,edx 3699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ecx,BYTE 7 3719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jz .L4 3729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3: 3749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov ebx,[esi] 3759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add esi,BYTE 4 3769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov eax,ebx 3789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov edx,ebx 3799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr eax,3 3819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr edx,6 3829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and eax,BYTE 0000000000011111b 3849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and edx, 0000001111100000b 3859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall shr ebx,9 3879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall or eax,edx 3899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall and ebx, 0111110000000000b 3919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall or eax,ebx 3939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mov [edi],ax 3959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall add edi,BYTE 2 3969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dec ecx 3989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall jnz .L3 3999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4: 4019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall retn 4029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%ifidn __OUTPUT_FORMAT__,elf32 4049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallsection .note.GNU-stack noalloc noexec nowrite progbits 4059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endif 406