19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;
29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; pII-optimised MMX format converters for HERMES
39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;   and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au)
59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; This source code is licensed under the GNU LGPL
69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 
79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; Please refer to the file COPYING.LIB contained in the distribution for
89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; licensing conditions		
99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;
109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; COPYRIGHT NOTICE
119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; 
129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; This file partly contains code that is (c) Intel Corporation, specifically
139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; the mode detection routine, and the converter to 15 bit (8 pixel
149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; conversion routine from the mmx programming tutorial pages).
159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;
169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;
179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; These routines aren't exactly pII optimised - it's just that as they
189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; are, they're terrible on p5 MMXs, but less so on pIIs.  Someone needs to
199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; optimise them for p5 MMXs..
209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallBITS 32
229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%include "common.inc"
249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_24RGB888
269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16RGB565
279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16BGR565
289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16RGB555
299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_FUNC _ConvertMMXpII32_16BGR555
309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall;; Macros for conversion routines
329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%macro _push_immq_mask 1
349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	push dword %1
359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	push dword %1
369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endmacro
379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%macro load_immq 2
399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	_push_immq_mask %2
409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq %1, [esp]
419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endmacro
429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%macro pand_immq 2
449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	_push_immq_mask %2
459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand %1, [esp]
469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endmacro
479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define CLEANUP_IMMQ_LOADS(num) \
499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	add esp, byte 8 * num
509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb888_mask 00ffffffh
529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb565_b 000000f8h
539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb565_g 0000fc00h
549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb565_r 00f80000h
559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb555_rb 00f800f8h
579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb555_g 0000f800h
589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_rgb555_mul 20000008h
599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%define mmx32_bgr555_mul 00082000h
609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSECTION .text
629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_24RGB888:
649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ; set up mm6 as the mask, mm7 as zero
669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm6, mmx32_rgb888_mask
679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        CLEANUP_IMMQ_LOADS(1)
689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pxor mm7, mm7
699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov edx, ecx                    ; save ecx
719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ecx, 0fffffffch             ; clear lower two bits
729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L1
739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jmp .L2
749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1:
769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm0, [esi]                 ; A R G B a r g b
789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm0, mm6                   ; 0 R G B 0 r g b
799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm1, [esi+8]               ; A R G B a r g b
809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm1, mm6                   ; 0 R G B 0 r g b
819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm2, mm0                   ; 0 R G B 0 r g b
839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckhdq mm2, mm7              ; 0 0 0 0 0 R G B
849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq mm0, mm7              ; 0 0 0 0 0 r g b
859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psllq mm2, 24                   ; 0 0 R G B 0 0 0
869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm0, mm2                    ; 0 0 R G B r g b
879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm3, mm1                   ; 0 R G B 0 r g b
899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psllq mm3, 48                   ; g b 0 0 0 0 0 0
909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm0, mm3                    ; g b R G B r g b
919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm4, mm1                   ; 0 R G B 0 r g b
939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckhdq mm4, mm7              ; 0 0 0 0 0 R G B
949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq mm1, mm7              ; 0 0 0 0 0 r g b
959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrlq mm1, 16                   ; 0 0 0 R G B 0 r
969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psllq mm4, 8                    ; 0 0 0 0 R G B 0
979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm1, mm4                    ; 0 0 0 0 R G B r
989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq [edi], mm0
1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi, BYTE 16
1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd [edi+8], mm1
1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi, BYTE 12
1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        sub ecx, BYTE 4
1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L1
1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2:
1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov ecx, edx
1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ecx, BYTE 3
1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jz .L4
1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3:
1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov al, [esi]
1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov bl, [esi+1]
1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov dl, [esi+2]
1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi], al
1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi+1], bl
1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi+2], dl
1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi, BYTE 4
1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi, BYTE 3
1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        dec ecx
1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L3
1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4:
1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        retn
1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16RGB565:
1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ; set up masks
1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm5, mmx32_rgb565_b
1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm6, mmx32_rgb565_g
1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm7, mmx32_rgb565_r
1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        CLEANUP_IMMQ_LOADS(3)
1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov edx, ecx
1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr ecx, 2
1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L1
1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jmp .L2         ; not necessary at the moment, but doesn't hurt (much)
1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1:
1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm0, [esi]         ; argb
1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm1, mm0           ; argb
1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm0, mm6           ; 00g0
1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm3, mm1           ; argb
1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm1, mm5           ; 000b
1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm3, mm7           ; 0r00
1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pslld mm1, 2            ; 0 0 000000bb bbb00000
1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm0, mm1            ; 0 0 ggggggbb bbb00000
1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrld mm0, 5            ; 0 0 00000ggg gggbbbbb
1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm4, [esi+8]       ; argb
1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm2, mm4           ; argb
1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm4, mm6           ; 00g0
1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm1, mm2           ; argb
1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm2, mm5           ; 000b
1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm1, mm7           ; 0r00
1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pslld mm2, 2            ; 0 0 000000bb bbb00000
1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm4, mm2            ; 0 0 ggggggbb bbb00000
1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrld mm4, 5            ; 0 0 00000ggg gggbbbbb
1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        packuswb mm3, mm1       ; R 0 r 0
1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        packssdw mm0, mm4       ; as above.. ish
1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm0, mm3            ; done.
1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq [edi], mm0
1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi, 16
1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi, 8
1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        dec ecx
1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L1
1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2:
1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov ecx, edx
1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ecx, BYTE 3
1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jz .L4
1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3:
1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov al, [esi]
1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov bh, [esi+1]
1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov ah, [esi+2]
1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr al, 3
1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and eax, 0F81Fh            ; BYTE?
1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr ebx, 5
1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ebx, 07E0h             ; BYTE?
1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add eax, ebx
1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi], al
1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi+1], ah
1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi, BYTE 4
1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi, BYTE 2
1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        dec ecx
1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L3
1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4:
1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	retn
1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16BGR565:
1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm5, mmx32_rgb565_r
1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm6, mmx32_rgb565_g
1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm7, mmx32_rgb565_b
1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        CLEANUP_IMMQ_LOADS(3)
2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov edx, ecx
2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr ecx, 2
2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L1
2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jmp .L2
2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1:
2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm0, [esi]                 ; a r g b
2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm1, mm0                   ; a r g b
2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm0, mm6                   ; 0 0 g 0
2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm3, mm1                   ; a r g b
2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm1, mm5                   ; 0 r 0 0
2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm3, mm7                   ; 0 0 0 b
2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psllq mm3, 16                   ; 0 b 0 0
2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrld mm1, 14                   ; 0 0 000000rr rrr00000
2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm0, mm1                    ; 0 0 ggggggrr rrr00000
2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrld mm0, 5                    ; 0 0 00000ggg gggrrrrr
2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm4, [esi+8]               ; a r g b
2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm2, mm4                   ; a r g b
2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm4, mm6                   ; 0 0 g 0
2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq mm1, mm2                   ; a r g b
2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm2, mm5                   ; 0 r 0 0
2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        pand mm1, mm7                   ; 0 0 0 b
2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psllq mm1, 16                   ; 0 b 0 0
2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrld mm2, 14                   ; 0 0 000000rr rrr00000
2289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm4, mm2                    ; 0 0 ggggggrr rrr00000
2299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        psrld mm4, 5                    ; 0 0 00000ggg gggrrrrr
2309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        packuswb mm3, mm1               ; BBBBB000 00000000 bbbbb000 00000000
2329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        packssdw mm0, mm4               ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR
2339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        por mm0, mm3                    ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
2349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movq [edi], mm0
2359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi, BYTE 16
2379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi, BYTE 8
2389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        dec ecx
2399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L1
2409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2:
2429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and edx, BYTE 3
2439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jz .L4
2449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3:
2459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov al, [esi+2]
2469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov bh, [esi+1]
2479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov ah, [esi]
2489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr al, 3
2499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and eax, 0F81Fh                    ; BYTE ?
2509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr ebx, 5
2519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ebx, 07E0h                     ; BYTE ?
2529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add eax, ebx
2539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi], al
2549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi+1], ah
2559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi, BYTE 4
2569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi, BYTE 2
2579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        dec edx
2589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jnz .L3
2599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4:
2619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        retn
2629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16BGR555:
2649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ; the 16BGR555 converter is identical to the RGB555 one,
2669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ; except it uses a different multiplier for the pmaddwd
2679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ; instruction.  cool huh.
2689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        load_immq mm7, mmx32_bgr555_mul
2709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jmp _convert_bgr555_cheat
2719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; This is the same as the Intel version.. they obviously went to
2739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; much more trouble to expand/coil the loop than I did, so theirs
2749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; would almost certainly be faster, even if only a little.
2759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
2769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall; (I think) a more accurate name..
2779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_ConvertMMXpII32_16RGB555:
2789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	load_immq mm7, mmx32_rgb555_mul
2809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall_convert_bgr555_cheat:
2819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	load_immq mm6, mmx32_rgb555_g
2829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	CLEANUP_IMMQ_LOADS(2)
2839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        
2849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	mov edx,ecx		           ; Save ecx 
2859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ecx,DWORD 0fffffff8h            ; clear lower three bits
2879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	jnz .L_OK
2889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jmp near .L2 
2899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L_OK:
2919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
2929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm2,[esi+8]
2939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm0,[esi]
2959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm3,mm2
2969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand_immq mm3, mmx32_rgb555_rb
2989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm1,mm0
2999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand_immq mm1, mmx32_rgb555_rb
3019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pmaddwd mm3,mm7
3029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	CLEANUP_IMMQ_LOADS(2)
3049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pmaddwd mm1,mm7
3069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand mm2,mm6
3079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L1:
3099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm4,[esi+24]
3109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand mm0,mm6
3119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm5,[esi+16]
3139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	por mm3,mm2
3149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	psrld mm3,6
3169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	por mm1,mm0
3179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm0,mm4
3199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	psrld mm1,6
3209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand_immq mm0, mmx32_rgb555_rb
3229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	packssdw mm1,mm3
3239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm3,mm5
3259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pmaddwd mm0,mm7
3269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand_immq mm3, mmx32_rgb555_rb
3289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand mm4,mm6
3299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq [edi],mm1			
3319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pmaddwd mm3,mm7
3329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi,BYTE 32
3349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	por mm4,mm0
3359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand mm5,mm6
3379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	psrld mm4,6
3389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm2,[esi+8]
3409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	por mm5,mm3
3419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm0,[esi]
3439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	psrld mm5,6
3449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm3,mm2
3469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq mm1,mm0
3479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand_immq mm3, mmx32_rgb555_rb
3499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	packssdw mm5,mm4
3509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand_immq mm1, mmx32_rgb555_rb
3529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pand mm2,mm6
3539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	CLEANUP_IMMQ_LOADS(4)
3559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movq [edi+8],mm5
3579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pmaddwd mm3,mm7
3589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pmaddwd mm1,mm7
3609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi,BYTE 16
3619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
3629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        sub ecx,BYTE 8
3639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	jz .L2
3649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        jmp .L1
3659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L2:	
3689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	mov ecx,edx
3699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
3709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ecx,BYTE 7
3719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	jz .L4
3729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
3739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L3:	
3749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	mov ebx,[esi]
3759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add esi,BYTE 4
3769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	
3779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov eax,ebx
3789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov edx,ebx
3799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr eax,3
3819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr edx,6
3829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and eax,BYTE 0000000000011111b
3849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and edx,     0000001111100000b
3859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        shr ebx,9
3879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        or eax,edx
3899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        and ebx,     0111110000000000b
3919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        or eax,ebx
3939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        mov [edi],ax
3959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        add edi,BYTE 2
3969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dec ecx
3989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	jnz .L3	
3999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall.L4:		
4019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	retn
4029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%ifidn __OUTPUT_FORMAT__,elf32
4049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallsection .note.GNU-stack noalloc noexec nowrite progbits
4059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall%endif
406