19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL - Simple DirectMedia Layer 39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Copyright (C) 1997-2012 Sam Lantinga 49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall This library is free software; you can redistribute it and/or 69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall modify it under the terms of the GNU Lesser General Public 79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall License as published by the Free Software Foundation; either 89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall version 2.1 of the License, or (at your option) any later version. 99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall This library is distributed in the hope that it will be useful, 119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall but WITHOUT ANY WARRANTY; without even the implied warranty of 129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Lesser General Public License for more details. 149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall You should have received a copy of the GNU Lesser General Public 169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall License along with this library; if not, write to the Free Software 179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Sam Lantinga 209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall slouken@libsdl.org 219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_config.h" 239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_video.h" 259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_blit.h" 269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall In Visual C, VC6 has mmintrin.h in the "Processor Pack" add-on. 299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Checking if _mm_free is #defined in malloc.h is is the only way to 309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall determine if the Processor Pack is installed, as far as I can tell. 319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ASSEMBLY_ROUTINES 349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* forced MMX to 0...it breaks on most compilers now. --ryan. */ 369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# define MMX_ASMBLIT 0 379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# define GCC_ASMBLIT 0 389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# elif defined(_MSC_VER) && defined(_M_IX86) 399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# if (_MSC_VER <= 1200) 409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# include <malloc.h> 419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# if defined(_mm_free) 429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# define HAVE_MMINTRIN_H 1 439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# endif 449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# else /* Visual Studio > VC6 always has mmintrin.h */ 459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# define HAVE_MMINTRIN_H 1 469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# endif 479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# if HAVE_MMINTRIN_H 489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# define MMX_ASMBLIT 1 499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# define MSVC_ASMBLIT 1 509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# endif 519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall# endif 529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif /* SDL_ASSEMBLY_ROUTINES */ 539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Function to check the CPU flags */ 559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_cpuinfo.h" 569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if GCC_ASMBLIT 579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "mmx.h" 589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#elif MSVC_ASMBLIT 599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include <mmintrin.h> 609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include <mm3dnow.h> 619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Functions to perform alpha blended blitting */ 649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* N->1 blending with per-surface alpha */ 669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) 679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = info->s_pixels; 719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = info->d_pixels; 739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *palmap = info->table; 759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcbpp = srcfmt->BytesPerPixel; 789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const unsigned A = srcfmt->alpha; 809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4( 839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; 859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR; 869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sG; 879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sB; 889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dR; 899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dG; 909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dB; 919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR = dstfmt->palette->colors[*dst].r; 939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG = dstfmt->palette->colors[*dst].g; 949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB = dstfmt->palette->colors[*dst].b; 959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); 969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR &= 0xff; 979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG &= 0xff; 989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB &= 0xff; 999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Pack RGB into 8bit pixel */ 1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if ( palmap == NULL ) { 1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dst =((dR>>5)<<(3+2))| 1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dG>>5)<<(2))| 1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dB>>6)<<(0)); 1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dst = palmap[((dR>>5)<<(3+2))| 1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dG>>5)<<(2)) | 1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dB>>6)<<(0))]; 1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst++; 1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcbpp; 1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, 1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width); 1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* N->1 blending with pixel alpha */ 1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitNto1PixelAlpha(SDL_BlitInfo *info) 1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = info->s_pixels; 1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = info->d_pixels; 1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *palmap = info->table; 1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcbpp = srcfmt->BytesPerPixel; 1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: fix alpha bit field expansion here too? */ 1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4( 1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; 1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR; 1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sG; 1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sB; 1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sA; 1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dR; 1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dG; 1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dB; 1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); 1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR = dstfmt->palette->colors[*dst].r; 1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG = dstfmt->palette->colors[*dst].g; 1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB = dstfmt->palette->colors[*dst].b; 1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR &= 0xff; 1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG &= 0xff; 1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB &= 0xff; 1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Pack RGB into 8bit pixel */ 1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if ( palmap == NULL ) { 1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dst =((dR>>5)<<(3+2))| 1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dG>>5)<<(2))| 1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dB>>6)<<(0)); 1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dst = palmap[((dR>>5)<<(3+2))| 1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dG>>5)<<(2)) | 1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dB>>6)<<(0)) ]; 1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst++; 1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcbpp; 1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, 1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width); 1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* colorkeyed N->1 blending with per-surface alpha */ 1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info) 1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = info->s_pixels; 1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = info->d_pixels; 1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *palmap = info->table; 1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcbpp = srcfmt->BytesPerPixel; 1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 ckey = srcfmt->colorkey; 1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const int A = srcfmt->alpha; 1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP( 1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; 1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR; 1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sG; 1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sB; 1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dR; 1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dG; 1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dB; 1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if ( Pixel != ckey ) { 2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR = dstfmt->palette->colors[*dst].r; 2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG = dstfmt->palette->colors[*dst].g; 2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB = dstfmt->palette->colors[*dst].b; 2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, A, dR, dG, dB); 2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR &= 0xff; 2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG &= 0xff; 2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB &= 0xff; 2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Pack RGB into 8bit pixel */ 2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if ( palmap == NULL ) { 2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dst =((dR>>5)<<(3+2))| 2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dG>>5)<<(2)) | 2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dB>>6)<<(0)); 2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dst = palmap[((dR>>5)<<(3+2))| 2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dG>>5)<<(2)) | 2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dB>>6)<<(0)) ]; 2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst++; 2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcbpp; 2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, 2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width); 2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if GCC_ASMBLIT 2289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 2299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) 2309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 2319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 2329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 2339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 2349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 2359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 2369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 2379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 dalpha = info->dst->Amask; 2389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint64 load; 2399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = 0x00fefefe00fefefeULL;/* alpha128 mask */ 2419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm4); /* alpha128 mask -> mm4 */ 2429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = 0x0001010100010101ULL;/* !alpha128 mask */ 2439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm3); /* !alpha128 mask -> mm3 */ 2449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r(dalpha, mm7); /* dst alpha mask */ 2459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 2469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 2479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_DOUBLE2( 2489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 2499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp++; 2509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 2519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 2529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall + (s & d & 0x00010101)) | dalpha; 2539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 2549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ 2559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ 2569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*srcp), mm1);/* 2 x src -> mm1(ARGBARGB) */ 2589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm1, mm5); /* 2 x src -> mm5(ARGBARGB) */ 2599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm6); /* dst & mask -> mm6 */ 2619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm5); /* src & mask -> mm5 */ 2629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddd_r2r(mm6, mm5); /* mm6 + mm5 -> mm5 */ 2639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm1, mm2); /* src & dst -> mm2 */ 2649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld_i2r(1, mm5); /* mm5 >> 1 -> mm5 */ 2659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm3, mm2); /* mm2 & !mask -> mm2 */ 2669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddd_r2r(mm5, mm2); /* mm5 + mm2 -> mm2 */ 2679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ 2699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2m(mm2, (*dstp));/* mm2 -> 2 x dst pixels */ 2709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 2; 2719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 2; 2729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 2739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 2749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 2759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 2769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall emms(); 2779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 2789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending with surface alpha */ 2809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) 2819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 2829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat* df = info->dst; 2839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 2849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 2869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* only call a128 version when R,G,B occupy lower bits */ 2879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall BlitRGBtoRGBSurfaceAlpha128MMX(info); 2889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 2899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 2909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 2919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 2929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 2939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 2949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 2959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pxor_r2r(mm5, mm5); /* 0 -> mm5 */ 2979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* form the alpha mult */ 2989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r(alpha, mm4); /* 0000000A -> mm4 */ 2999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ 3009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckldq_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ 3019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); 3029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r(alpha, mm0); /* 00000FFF -> mm0 */ 3039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm0, mm0); /* 00FFFFFF -> mm0 */ 3049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm0, mm4); /* 0A0A0A0A -> mm4, minus 1 chan */ 3059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* at this point mm4 can be 000A0A0A or 0A0A0A00 or another combo */ 3069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r(df->Amask, mm7); /* dst alpha mask */ 3079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckldq_r2r(mm7, mm7); /* dst alpha mask | dst alpha mask -> mm7 */ 3089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 3109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_DOUBLE2({ 3119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* One Pixel Blend */ 3129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ 3139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ 3149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm5, mm1); /* 0A0R0G0B -> mm1(src) */ 3159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm5, mm2); /* 0A0R0G0B -> mm2(dst) */ 3169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ 3189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ 3199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ 3209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ 3219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packuswb_r2r(mm5, mm2); /* ARGBARGB -> mm2 */ 3239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm7, mm2); /* mm7(full alpha) | mm2 -> mm2 */ 3249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_r2m(mm2, *dstp);/* mm2 -> pixel */ 3259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 3269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 3279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 3289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Two Pixels Blend */ 3299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*srcp), mm0);/* 2 x src -> mm0(ARGBARGB)*/ 3309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*dstp), mm2);/* 2 x dst -> mm2(ARGBARGB) */ 3319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm0, mm1); /* 2 x src -> mm1(ARGBARGB) */ 3329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm6); /* 2 x dst -> mm6(ARGBARGB) */ 3339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm5, mm0); /* low - 0A0R0G0B -> mm0(src1) */ 3359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckhbw_r2r(mm5, mm1); /* high - 0A0R0G0B -> mm1(src2) */ 3369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm5, mm2); /* low - 0A0R0G0B -> mm2(dst1) */ 3379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpckhbw_r2r(mm5, mm6); /* high - 0A0R0G0B -> mm6(dst2) */ 3389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm2, mm0);/* src1 - dst1 -> mm0 */ 3409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm4, mm0); /* mm0 * alpha -> mm0 */ 3419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(8, mm0); /* mm0 >> 8 -> mm1 */ 3429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddb_r2r(mm0, mm2); /* mm0 + mm2(dst1) -> mm2 */ 3439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm1);/* src2 - dst2 -> mm1 */ 3459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ 3469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1 */ 3479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddb_r2r(mm1, mm6); /* mm1 + mm6(dst2) -> mm6 */ 3489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packuswb_r2r(mm6, mm2); /* ARGBARGB -> mm2 */ 3509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm7, mm2); /* mm7(dst alpha) | mm2 -> mm2 */ 3519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2m(mm2, *dstp);/* mm2 -> 2 x pixel */ 3539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 2; 3559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 2; 3569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 3579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 3589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 3599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 3609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall emms(); 3619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 3629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 3639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast ARGB888->(A)RGB888 blending with pixel alpha */ 3659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) 3669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 3679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 3689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 3699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 3709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 3719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 3729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 3739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat* sf = info->src; 3749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 amask = sf->Amask; 3759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pxor_r2r(mm6, mm6); /* 0 -> mm6 */ 3779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* form multiplication mask */ 3789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r(sf->Amask, mm7); /* 0000F000 -> mm7 */ 3799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm7, mm7); /* FF000000 -> mm7 */ 3809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pcmpeqb_r2r(mm0, mm0); /* FFFFFFFF -> mm0 */ 3819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm0, mm3); /* FFFFFFFF -> mm3 (for later) */ 3829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pxor_r2r(mm0, mm7); /* 00FFFFFF -> mm7 (mult mask) */ 3839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* form channel masks */ 3849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm7, mm0); /* 00FFFFFF -> mm0 */ 3859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packsswb_r2r(mm6, mm0); /* 00000FFF -> mm0 (channel mask) */ 3869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packsswb_r2r(mm6, mm3); /* 0000FFFF -> mm3 */ 3879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pxor_r2r(mm0, mm3); /* 0000F000 -> mm3 (~channel mask) */ 3889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* get alpha channel shift */ 3899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ( 3909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %0, %%mm5" 3919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : : "rm" ((Uint32) sf->Ashift) ); /* Ashift -> mm5 */ 3929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 3939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 3949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 3959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 alpha = *srcp & amask; 3969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: Here we special-case opaque alpha since the 3979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall compositioning used (>>8 instead of /255) doesn't handle 3989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall it correctly. Also special-case alpha=0 for speed? 3999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Benchmark this! */ 4009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 0) { 4019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* do nothing */ 4029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else if(alpha == amask) { 4039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* opaque alpha -- copy RGB, keep dst alpha */ 4049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* using MMX here to free up regular registers for other things */ 4059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ 4069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ 4079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm0, mm1); /* src & chanmask -> mm1 */ 4089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm3, mm2); /* dst & ~chanmask -> mm2 */ 4099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm1, mm2); /* src | dst -> mm2 */ 4109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_r2m(mm2, (*dstp)); /* mm2 -> dst */ 4119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 4129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r((*srcp), mm1);/* src(ARGB) -> mm1 (0000ARGB)*/ 4139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm6, mm1); /* 0A0R0G0B -> mm1 */ 4149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_m2r((*dstp), mm2);/* dst(ARGB) -> mm2 (0000ARGB)*/ 4169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklbw_r2r(mm6, mm2); /* 0A0R0G0B -> mm2 */ 4179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ __volatile__ ( 4199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %0, %%mm4" 4209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : : "r" (alpha) ); /* 0000A000 -> mm4 */ 4219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrld_r2r(mm5, mm4); /* mm4 >> mm5 -> mm4 (0000000A) */ 4229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm4, mm4); /* 00000A0A -> mm4 */ 4239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm4, mm4); /* 0A0A0A0A -> mm4 */ 4249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm4); /* 000A0A0A -> mm4, preserve dst alpha on add */ 4259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 4279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm2, mm1);/* src - dst -> mm1 */ 4289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm4, mm1); /* mm1 * alpha -> mm1 */ 4299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(8, mm1); /* mm1 >> 8 -> mm1(000R0G0B) */ 4309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddb_r2r(mm1, mm2); /* mm1 + mm2(dst) -> mm2 */ 4319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall packuswb_r2r(mm6, mm2); /* 0000ARGB -> mm2 */ 4339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movd_r2m(mm2, *dstp);/* mm2 -> dst */ 4349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 4359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 4369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 4379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 4389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 4399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 4409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 4419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall emms(); 4429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 4439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* End GCC_ASMBLIT */ 4449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#elif MSVC_ASMBLIT 4469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 4479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info) 4489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 4499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 4509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 4519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 4529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 4539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 4549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 4559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 dalpha = info->dst->Amask; 4569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __m64 src1, src2, dst1, dst2, lmask, hmask, dsta; 4589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */ 4609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */ 4619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 4629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (height--) { 4649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int n = width; 4659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if ( n & 1 ) { 4669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp++; 4679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 4689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 4699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall + (s & d & 0x00010101)) | dalpha; 4709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall n--; 4719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 4729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall for (n >>= 1; n > 0; --n) { 4749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = *(__m64*)dstp; /* 2 x dst -> dst1(ARGBARGB) */ 4759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 4769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB) */ 4789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 4799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */ 4819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */ 4829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */ 4839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */ 4849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */ 4869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */ 4879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */ 4889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */ 4899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *(__m64*)dstp = dst1; /* dst1 -> 2 x dst pixels */ 4919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 2; 4929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 2; 4939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 4949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 4959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 4969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 4979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 4989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _mm_empty(); 4999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 5009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending with surface alpha */ 5029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo *info) 5039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 5049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat* df = info->dst; 5059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 chanmask = df->Rmask | df->Gmask | df->Bmask; 5069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 5079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) { 5099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* only call a128 version when R,G,B occupy lower bits */ 5109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall BlitRGBtoRGBSurfaceAlpha128MMX(info); 5119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 5129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 5139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 5149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 5159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 5169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 5179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 5189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 dalpha = df->Amask; 5199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 amult; 5209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta; 5229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 5249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* form the alpha mult */ 5259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall amult = alpha | (alpha << 8); 5269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall amult = amult | (amult << 16); 5279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall chanmask = (0xff << df->Rshift) | (0xff << df->Gshift) | (0xff << df->Bshift); 5289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */ 5299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */ 5309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */ 5319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */ 5329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (height--) { 5349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int n = width; 5359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (n & 1) { 5369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* One Pixel Blend */ 5379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB)*/ 5389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */ 5399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ 5419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 5429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */ 5449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 5459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 5469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */ 5479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ 5499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 5509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 5519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 5539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 5549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall n--; 5569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall for (n >>= 1; n > 0; --n) { 5599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Two Pixels Blend */ 5609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = *(__m64*)srcp; /* 2 x src -> src1(ARGBARGB)*/ 5619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; /* 2 x src -> src2(ARGBARGB) */ 5629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */ 5639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */ 5649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = *(__m64*)dstp;/* 2 x dst -> dst1(ARGBARGB) */ 5669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */ 5679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */ 5689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */ 5699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ 5719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */ 5729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */ 5739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */ 5749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src2 - dst2 -> src2 */ 5769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 5779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */ 5789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */ 5799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */ 5819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */ 5829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *(__m64*)dstp = dst1; /* dst1 -> 2 x pixel */ 5849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 2; 5869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 2; 5879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 5899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 5909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _mm_empty(); 5929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 5939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 5949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 5959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast ARGB888->(A)RGB888 blending with pixel alpha */ 5969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info) 5979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 5989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 5999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 6009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 6019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 6029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 6039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 6049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat* sf = info->src; 6059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; 6069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 amask = sf->Amask; 6079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 ashift = sf->Ashift; 6089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint64 multmask; 6099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __m64 src1, dst1, mm_alpha, mm_zero, dmask; 6119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 6139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall multmask = ~(0xFFFFi64 << (ashift * 2)); 6149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ 6159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 6179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 6189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 alpha = *srcp & amask; 6199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (alpha == 0) { 6209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* do nothing */ 6219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else if (alpha == amask) { 6229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* opaque alpha -- copy RGB, keep dst alpha */ 6239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); 6249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 6259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ 6269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ 6279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ 6299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 6309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ 6329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ 6339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 6349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 6359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ 6369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 6389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_sub_pi16(src1, dst1);/* src1 - dst1 -> src1 */ 6399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src1 - dst1) * alpha -> src1 */ 6409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ 6419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1 -> dst1(0A0R0G0B) */ 6429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ 6439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 6459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 6469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 6479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 6489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 6499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 6509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 6519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 6529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _mm_empty(); 6539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 6549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* End MSVC_ASMBLIT */ 6559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 6579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 6599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if __MWERKS__ 6609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#pragma altivec_model on 6619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 6629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if HAVE_ALTIVEC_H 6639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include <altivec.h> 6649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 6659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include <assert.h> 6669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if (defined(__MACOSX__) && (__GNUC__ < 4)) 6689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ 6699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p ) 6709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ 6719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (vector unsigned short) ( a,b,c,d,e,f,g,h ) 6729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else 6739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ 6749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p } 6759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \ 6769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (vector unsigned short) { a,b,c,d,e,f,g,h } 6779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 6789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F) 6809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define VECPRINT(msg, v) do { \ 6819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned int tmpvec = (vector unsigned int)(v); \ 6829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned int *vp = (unsigned int *)&tmpvec; \ 6839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \ 6849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} while (0) 6859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 6869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* the permuation vector that takes the high bytes out of all the appropriate shorts 6879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (vector unsigned char)( 6889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x10, 0x02, 0x12, 6899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x04, 0x14, 0x06, 0x16, 6909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x08, 0x18, 0x0A, 0x1A, 6919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x0C, 0x1C, 0x0E, 0x1E ); 6929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/ 6939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F))) 6949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12))) 6959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24())) 6969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \ 6979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ? vec_lvsl(0, src) \ 6989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : vec_add(vec_lvsl(8, src), vec_splat_u8(8))) 6999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \ 7029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vtemp1 contains source AAGGAAGGAAGGAAGG */ \ 7039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vtemp1 = vec_mule(vs, valpha); \ 7049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vtemp2 contains source RRBBRRBBRRBBRRBB */ \ 7059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vtemp2 = vec_mulo(vs, valpha); \ 7069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* valpha2 is 255-alpha */ \ 7079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha2 = vec_nor(valpha, valpha); \ 7089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \ 7099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vtemp3 = vec_mule(vd, valpha2); \ 7109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \ 7119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \ 7129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* add source and dest */ \ 7139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp1 = vec_add(vtemp1, vtemp3); \ 7149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp2 = vec_add(vtemp2, vtemp4); \ 7159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \ 7169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp1 = vec_add(vtemp1, v1_16); \ 7179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp3 = vec_sr(vtemp1, v8_16); \ 7189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp1 = vec_add(vtemp1, vtemp3); \ 7199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \ 7209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp2 = vec_add(vtemp2, v1_16); \ 7219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp4 = vec_sr(vtemp2, v8_16); \ 7229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vtemp2 = vec_add(vtemp2, vtemp4); \ 7239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* (>>8) and get ARGBARGBARGBARGB */ \ 7249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \ 7259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} while (0) 7269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Calculate the permute vector used for 32->32 swizzling */ 7289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt, 7299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const SDL_PixelFormat *dstfmt) 7309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 7319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 7329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * We have to assume that the bits that aren't used by other 7339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * colors is alpha, and it's one complete byte, since some formats 7349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * leave alpha with a zero mask, but we should still swizzle the bits. 7359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 7369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* ARGB */ 7379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const static struct SDL_PixelFormat default_pixel_format = { 7389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall NULL, 0, 0, 7399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0, 0, 0, 0, 7409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16, 8, 0, 24, 7419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, 7429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0, 0}; 7439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (!srcfmt) { 7449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcfmt = &default_pixel_format; 7459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 7469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (!dstfmt) { 7479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstfmt = &default_pixel_format; 7489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 7499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall const vector unsigned char plus = VECUINT8_LITERAL 7509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ( 0x00, 0x00, 0x00, 0x00, 7519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x04, 0x04, 0x04, 0x04, 7529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x08, 0x08, 0x08, 0x08, 7539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x0C, 0x0C, 0x0C, 0x0C ); 7549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vswiz; 7559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned int srcvec; 7569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define RESHIFT(X) (3 - ((X) >> 3)) 7579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift); 7589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift); 7599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift); 7609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 amask; 7619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Use zero for alpha if either surface doesn't have alpha */ 7629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (dstfmt->Amask) { 7639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift); 7649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 7659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF); 7669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 7679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef RESHIFT 7689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask); 7699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0)); 7709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return(vswiz); 7719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 7729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit32to565PixelAlphaAltivec(SDL_BlitInfo *info) 7749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 7759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 7769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = (Uint8 *)info->s_pixels; 7779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 7789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = (Uint8 *)info->d_pixels; 7799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 7809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 7819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char v0 = vec_splat_u8(0); 7839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v8_16 = vec_splat_u16(8); 7849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v1_16 = vec_splat_u16(1); 7859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v2_16 = vec_splat_u16(2); 7869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v3_16 = vec_splat_u16(3); 7879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned int v8_32 = vec_splat_u32(8); 7889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned int v16_32 = vec_add(v8_32, v8_32); 7899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v3f = VECUINT16_LITERAL( 7909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x003f, 0x003f, 0x003f, 0x003f, 7919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x003f, 0x003f, 0x003f, 0x003f); 7929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vfc = VECUINT16_LITERAL( 7939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00fc, 0x00fc, 0x00fc, 0x00fc, 7949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00fc, 0x00fc, 0x00fc, 0x00fc); 7959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 7969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 7979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x10 - 0x1f is the alpha 7989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00 - 0x0e evens are the red 7999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x01 - 0x0f odds are zero 8009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 8019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vredalpha1 = VECUINT8_LITERAL( 8029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x10, 0x00, 0x01, 0x01, 8039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x10, 0x02, 0x01, 0x01, 8049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x10, 0x04, 0x01, 0x01, 8059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x10, 0x06, 0x01, 0x01 8069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 8079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vredalpha2 = (vector unsigned char)( 8089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_add((vector unsigned int)vredalpha1, vec_sl(v8_32, v16_32)) 8099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 8109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 8119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00 - 0x0f is ARxx ARxx ARxx ARxx 8129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x11 - 0x0f odds are blue 8139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 8149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vblue1 = VECUINT8_LITERAL( 8159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x01, 0x02, 0x11, 8169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x04, 0x05, 0x06, 0x13, 8179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x08, 0x09, 0x0a, 0x15, 8189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x0c, 0x0d, 0x0e, 0x17 8199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 8209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vblue2 = (vector unsigned char)( 8219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_add((vector unsigned int)vblue1, v8_32) 8229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 8239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 8249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00 - 0x0f is ARxB ARxB ARxB ARxB 8259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x10 - 0x0e evens are green 8269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 8279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vgreen1 = VECUINT8_LITERAL( 8289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x01, 0x10, 0x03, 8299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x04, 0x05, 0x12, 0x07, 8309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x08, 0x09, 0x14, 0x0b, 8319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x0c, 0x0d, 0x16, 0x0f 8329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 8339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vgreen2 = (vector unsigned char)( 8349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_add((vector unsigned int)vgreen1, vec_sl(v8_32, v8_32)) 8359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); 8369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vgmerge = VECUINT8_LITERAL( 8379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x02, 0x00, 0x06, 8389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x0a, 0x00, 0x0e, 8399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x12, 0x00, 0x16, 8409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 0x00, 0x1a, 0x00, 0x1e); 8419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char mergePermute = VEC_MERGE_PERMUTE(); 8429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL); 8439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 8449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 8459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7); 8469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vf800 = vec_sl(vf800, vec_splat_u16(8)); 8479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 8489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 8499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int extrawidth; 8509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valigner; 8519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsrc; 8529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char voverflow; 8539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 8549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 8559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ONE_PIXEL_BLEND(condition, widthvar) \ 8569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (condition) { \ 8579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; \ 8589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR, sG, sB, dR, dG, dB, sA; \ 8599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \ 8609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sA) { \ 8619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned short dstpixel = *((unsigned short *)dst); \ 8629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dR = (dstpixel >> 8) & 0xf8; \ 8639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dG = (dstpixel >> 3) & 0xfc; \ 8649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dB = (dstpixel << 3) & 0xf8; \ 8659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 8669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *((unsigned short *)dst) = ( \ 8679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \ 8689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ); \ 8699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } \ 8709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += 4; \ 8719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += 2; \ 8729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall widthvar--; \ 8739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 8749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width); 8759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall extrawidth = (width % 8); 8769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valigner = VEC_ALIGNER(src); 8779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrc = (vector unsigned char)vec_ld(0, src); 8789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= extrawidth; 8799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (width) { 8809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha; 8819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsrc1, vsrc2; 8829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vdst1, vdst2; 8839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vR, vG, vB; 8849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short vpixel, vrpixel, vgpixel, vbpixel; 8859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 8869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Load 8 pixels from src as ARGB */ 8879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = (vector unsigned char)vec_ld(15, src); 8889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrc = vec_perm(vsrc, voverflow, valigner); 8899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrc1 = vec_perm(vsrc, vsrc, vpermute); 8909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += 16; 8919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrc = (vector unsigned char)vec_ld(15, src); 8929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = vec_perm(voverflow, vsrc, valigner); 8939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrc2 = vec_perm(voverflow, voverflow, vpermute); 8949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += 16; 8959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 8969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Load 8 pixels from dst as XRGB */ 8979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = vec_ld(0, dst); 8989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vR = vec_and((vector unsigned short)voverflow, vf800); 8999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vB = vec_sl((vector unsigned short)voverflow, v3_16); 9009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vG = vec_sl(vB, v2_16); 9019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha1); 9029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1); 9039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1); 9049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, (vector unsigned char)vR, vredalpha2); 9059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2); 9069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2); 9079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Alpha blend 8 pixels as ARGB */ 9099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_perm(vsrc1, v0, valphaPermute); 9109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16, v8_16); 9119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_perm(vsrc2, v0, valphaPermute); 9129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16, v8_16); 9139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Convert 8 pixels to 565 */ 9159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vpixel = (vector unsigned short)vec_packpx((vector unsigned int)vdst1, (vector unsigned int)vdst2); 9169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vgpixel = (vector unsigned short)vec_perm(vdst1, vdst2, vgmerge); 9179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vgpixel = vec_and(vgpixel, vfc); 9189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vgpixel = vec_sl(vgpixel, v3_16); 9199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vrpixel = vec_sl(vpixel, v1_16); 9209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vrpixel = vec_and(vrpixel, vf800); 9219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vbpixel = vec_and(vpixel, v3f); 9229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst1 = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel); 9239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdst1 = vec_or(vdst1, (vector unsigned char)vbpixel); 9249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Store 8 pixels */ 9269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_st(vdst1, 0, dst); 9279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= 8; 9299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += 16; 9309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 9319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((extrawidth), extrawidth); 9329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ONE_PIXEL_BLEND 9339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 9349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 9359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 9369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 9379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo *info) 9399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 9409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 9419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 9429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 9439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 9449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 9459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 9469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 9479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 9489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sA = srcfmt->alpha; 9499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 9509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; 9519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 ckey = info->src->colorkey; 9529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char mergePermute; 9539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsrcPermute; 9549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vdstPermute; 9559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsdstPermute; 9569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha; 9579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphamask; 9589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vbits; 9599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char v0; 9609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v1; 9619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v8; 9629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned int vckey; 9639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned int vrgbmask; 9649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mergePermute = VEC_MERGE_PERMUTE(); 9669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v0 = vec_splat_u8(0); 9679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v1 = vec_splat_u16(1); 9689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v8 = vec_splat_u16(8); 9699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha to 255 on the destination surf */ 9719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphamask = VEC_ALPHA_MASK(); 9729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrcPermute = calc_swizzle32(srcfmt, NULL); 9749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdstPermute = calc_swizzle32(NULL, dstfmt); 9759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsdstPermute = calc_swizzle32(dstfmt, NULL); 9769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set a vector full of alpha and 255-alpha */ 9789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((unsigned char *)&valpha)[0] = alpha; 9799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_splat(valpha, 0); 9809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vbits = (vector unsigned char)vec_splat_s8(-1); 9819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ckey &= rgbmask; 9839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((unsigned int *)(char*)&vckey)[0] = ckey; 9849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vckey = vec_splat(vckey, 0); 9859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask; 9869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vrgbmask = vec_splat(vrgbmask, 0); 9879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 9889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 9899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 9909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ONE_PIXEL_BLEND(condition, widthvar) \ 9919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (condition) { \ 9929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; \ 9939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR, sG, sB, dR, dG, dB; \ 9949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \ 9959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sA && Pixel != ckey) { \ 9969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \ 9979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ 9989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 9999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ 10009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } \ 10019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; \ 10029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; \ 10039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall widthvar--; \ 10049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 10059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 10069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (width > 0) { 10079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int extrawidth = (width % 4); 10089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valigner = VEC_ALIGNER(srcp); 10099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 10109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= extrawidth; 10119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (width) { 10129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsel; 10139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char voverflow; 10149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vd; 10159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vd_orig; 10169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* s = *srcp */ 10189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = (vector unsigned char)vec_ld(15, srcp); 10199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, voverflow, valigner); 10209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vsel is set for items that match the key */ 10229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsel = (vector unsigned char)vec_and((vector unsigned int)vs, vrgbmask); 10239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsel = (vector unsigned char)vec_cmpeq((vector unsigned int)vsel, vckey); 10249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* permute to source format */ 10269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, valpha, vsrcPermute); 10279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* d = *dstp */ 10299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = (vector unsigned char)vec_ld(0, dstp); 10309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd_orig = vd = vec_perm(vd, v0, vsdstPermute); 10319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 10339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha channel to full on */ 10359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_or(vd, valphamask); 10369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* mask out color key */ 10389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_sel(vd, vd_orig, vsel); 10399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* permute to dest format */ 10419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_perm(vd, vbits, vdstPermute); 10429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* *dstp = res */ 10449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_st((vector unsigned int)vd, 0, dstp); 10459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 10479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 10489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= 4; 10499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = voverflow; 10509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 10519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((extrawidth), extrawidth); 10529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 10539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ONE_PIXEL_BLEND 10549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 10569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 10579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 10589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 10599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit32to32PixelAlphaAltivec(SDL_BlitInfo *info) 10629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 10639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 10649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 10659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 10669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 10679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 10689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 10699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 10709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 10719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char mergePermute; 10729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphaPermute; 10739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsrcPermute; 10749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vdstPermute; 10759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsdstPermute; 10769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphamask; 10779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vpixelmask; 10789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char v0; 10799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v1; 10809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v8; 10819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v0 = vec_splat_u8(0); 10839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v1 = vec_splat_u16(1); 10849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v8 = vec_splat_u16(8); 10859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mergePermute = VEC_MERGE_PERMUTE(); 10869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphamask = VEC_ALPHA_MASK(); 10879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 10889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vpixelmask = vec_nor(valphamask, v0); 10899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrcPermute = calc_swizzle32(srcfmt, NULL); 10909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdstPermute = calc_swizzle32(NULL, dstfmt); 10919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsdstPermute = calc_swizzle32(dstfmt, NULL); 10929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 10939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 10949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width = info->d_width; 10959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 10969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; \ 10979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR, sG, sB, dR, dG, dB, sA, dA; \ 10989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \ 10999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sA) { \ 11009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \ 11019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 11029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \ 11039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } \ 11049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; \ 11059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; \ 11069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall widthvar--; \ 11079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 11089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 11099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (width > 0) { 11109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vsrcPermute */ 11119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* vdstPermute */ 11129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int extrawidth = (width % 4); 11139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valigner = VEC_ALIGNER(srcp); 11149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 11159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= extrawidth; 11169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (width) { 11179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char voverflow; 11189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vd; 11199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha; 11209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vdstalpha; 11219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* s = *srcp */ 11229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = (vector unsigned char)vec_ld(15, srcp); 11239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, voverflow, valigner); 11249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, v0, vsrcPermute); 11259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_perm(vs, v0, valphaPermute); 11279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* d = *dstp */ 11299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = (vector unsigned char)vec_ld(0, dstp); 11309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_perm(vd, v0, vsdstPermute); 11319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdstalpha = vec_and(vd, valphamask); 11329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 11349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha to the dest alpha */ 11369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_and(vd, vpixelmask); 11379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_or(vd, vdstalpha); 11389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_perm(vd, v0, vdstPermute); 11399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* *dstp = res */ 11419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_st((vector unsigned int)vd, 0, dstp); 11429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 11449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 11459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= 4; 11469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = voverflow; 11479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 11499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((extrawidth), extrawidth); 11509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 11519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 11529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 11539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ONE_PIXEL_BLEND 11549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 11559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 11569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast ARGB888->(A)RGB888 blending with pixel alpha */ 11589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo *info) 11599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 11609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 11619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 11629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 11639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 11649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 11659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 11669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char mergePermute; 11679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphaPermute; 11689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphamask; 11699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vpixelmask; 11709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char v0; 11719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v1; 11729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v8; 11739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v0 = vec_splat_u8(0); 11749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v1 = vec_splat_u16(1); 11759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v8 = vec_splat_u16(8); 11769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mergePermute = VEC_MERGE_PERMUTE(); 11779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphamask = VEC_ALPHA_MASK(); 11789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphaPermute = vec_and(vec_lvsl(0, (int *)NULL), vec_splat_u8(0xC)); 11799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vpixelmask = vec_nor(valphamask, v0); 11829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 11839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width = info->d_width; 11849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ONE_PIXEL_BLEND(condition, widthvar) \ 11859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ((condition)) { \ 11869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 dalpha; \ 11879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d; \ 11889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s1; \ 11899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d1; \ 11909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp; \ 11919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 alpha = s >> 24; \ 11929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha) { \ 11939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == SDL_ALPHA_OPAQUE) { \ 11949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \ 11959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { \ 11969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; \ 11979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dalpha = d & 0xff000000; \ 11989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s1 = s & 0xff00ff; \ 11999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = d & 0xff00ff; \ 12009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ 12019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s &= 0xff00; \ 12029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0xff00; \ 12039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ 12049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = d1 | d | dalpha; \ 12059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } \ 12069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } \ 12079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; \ 12089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; \ 12099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall widthvar--; \ 12109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 12119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 12129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (width > 0) { 12139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int extrawidth = (width % 4); 12149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valigner = VEC_ALIGNER(srcp); 12159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 12169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= extrawidth; 12179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (width) { 12189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char voverflow; 12199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vd; 12209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha; 12219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vdstalpha; 12229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* s = *srcp */ 12239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = (vector unsigned char)vec_ld(15, srcp); 12249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, voverflow, valigner); 12259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_perm(vs, v0, valphaPermute); 12279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* d = *dstp */ 12299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = (vector unsigned char)vec_ld(0, dstp); 12309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdstalpha = vec_and(vd, valphamask); 12319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 12339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha to the dest alpha */ 12359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_and(vd, vpixelmask); 12369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_or(vd, vdstalpha); 12379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* *dstp = res */ 12399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_st((vector unsigned int)vd, 0, dstp); 12409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 12429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 12439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= 4; 12449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = voverflow; 12459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 12469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((extrawidth), extrawidth); 12479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 12489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 12499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 12509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 12519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ONE_PIXEL_BLEND 12529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 12539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo *info) 12559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 12569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* XXX : 6 */ 12579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 12589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 12599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 12609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 12619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 12629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 12639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 12649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 12659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sA = srcfmt->alpha; 12669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 12679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char mergePermute; 12689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsrcPermute; 12699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vdstPermute; 12709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vsdstPermute; 12719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha; 12729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphamask; 12739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vbits; 12749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v1; 12759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v8; 12769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mergePermute = VEC_MERGE_PERMUTE(); 12789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v1 = vec_splat_u16(1); 12799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v8 = vec_splat_u16(8); 12809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha to 255 on the destination surf */ 12829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphamask = VEC_ALPHA_MASK(); 12839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsrcPermute = calc_swizzle32(srcfmt, NULL); 12859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vdstPermute = calc_swizzle32(NULL, dstfmt); 12869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vsdstPermute = calc_swizzle32(dstfmt, NULL); 12879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set a vector full of alpha and 255-alpha */ 12899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((unsigned char *)&valpha)[0] = alpha; 12909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_splat(valpha, 0); 12919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vbits = (vector unsigned char)vec_splat_s8(-1); 12929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 12939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 12949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 12959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 12969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; \ 12979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR, sG, sB, dR, dG, dB; \ 12989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \ 12999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \ 13009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \ 13019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \ 13029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; \ 13039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; \ 13049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall widthvar--; \ 13059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 13069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 13079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (width > 0) { 13089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int extrawidth = (width % 4); 13099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valigner = VEC_ALIGNER(srcp); 13109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 13119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= extrawidth; 13129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (width) { 13139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char voverflow; 13149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vd; 13159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* s = *srcp */ 13179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = (vector unsigned char)vec_ld(15, srcp); 13189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, voverflow, valigner); 13199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, valpha, vsrcPermute); 13209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* d = *dstp */ 13229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = (vector unsigned char)vec_ld(0, dstp); 13239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_perm(vd, vd, vsdstPermute); 13249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 13269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha channel to full on */ 13289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_or(vd, valphamask); 13299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_perm(vd, vbits, vdstPermute); 13309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* *dstp = res */ 13329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_st((vector unsigned int)vd, 0, dstp); 13339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 13359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 13369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= 4; 13379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = voverflow; 13389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 13399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((extrawidth), extrawidth); 13409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 13419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ONE_PIXEL_BLEND 13429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 13449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 13459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 13469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 13489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending */ 13519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo *info) 13529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 13539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 13549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 13559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 13569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 13579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 13589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 13599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char mergePermute; 13609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valpha; 13619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valphamask; 13629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v1; 13639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned short v8; 13649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mergePermute = VEC_MERGE_PERMUTE(); 13669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v1 = vec_splat_u16(1); 13679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall v8 = vec_splat_u16(8); 13689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha to 255 on the destination surf */ 13709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valphamask = VEC_ALPHA_MASK(); 13719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set a vector full of alpha and 255-alpha */ 13739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((unsigned char *)&valpha)[0] = alpha; 13749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall valpha = vec_splat(valpha, 0); 13759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 13769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 13779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 13789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \ 13799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp; \ 13809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; \ 13819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s1 = s & 0xff00ff; \ 13829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d1 = d & 0xff00ff; \ 13839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = (d1 + ((s1 - d1) * alpha >> 8)) \ 13849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall & 0xff00ff; \ 13859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s &= 0xff00; \ 13869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0xff00; \ 13879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ 13889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = d1 | d | 0xff000000; \ 13899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; \ 13909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; \ 13919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall widthvar--; \ 13929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 13939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width); 13949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (width > 0) { 13959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int extrawidth = (width % 4); 13969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char valigner = VEC_ALIGNER(srcp); 13979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vs = (vector unsigned char)vec_ld(0, srcp); 13989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= extrawidth; 13999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while (width) { 14009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char voverflow; 14019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vector unsigned char vd; 14029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* s = *srcp */ 14049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall voverflow = (vector unsigned char)vec_ld(15, srcp); 14059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = vec_perm(vs, voverflow, valigner); 14069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* d = *dstp */ 14089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = (vector unsigned char)vec_ld(0, dstp); 14099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8); 14119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* set the alpha channel to full on */ 14139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vd = vec_or(vd, valphamask); 14149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* *dstp = res */ 14169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vec_st((vector unsigned int)vd, 0, dstp); 14179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 14199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 14209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width -= 4; 14219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall vs = voverflow; 14229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 14239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ONE_PIXEL_BLEND((extrawidth), extrawidth); 14249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 14259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ONE_PIXEL_BLEND 14269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 14289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 14299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 14309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 14319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if __MWERKS__ 14329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#pragma altivec_model off 14339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 14349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif /* SDL_ALTIVEC_BLITTERS */ 14359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */ 14379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) 14389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 14399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 14409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 14419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 14429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 14439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 14449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 14459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 14479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 14489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp++; 14499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 14509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) 14519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall + (s & d & 0x00010101)) | 0xff000000; 14529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 14539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 14549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 14559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 14569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 14579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB888->(A)RGB888 blending with surface alpha */ 14599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) 14609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 14619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 14629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 14639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall BlitRGBtoRGBSurfaceAlpha128(info); 14649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 14659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 14669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 14679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 14689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 14699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 14709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 14719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s; 14729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d; 14739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s1; 14749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d1; 14759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 14769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 14779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_DOUBLE2({ 14789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* One Pixel Blend */ 14799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp; 14809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 14819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s1 = s & 0xff00ff; 14829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = d & 0xff00ff; 14839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = (d1 + ((s1 - d1) * alpha >> 8)) 14849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall & 0xff00ff; 14859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s &= 0xff00; 14869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0xff00; 14879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d + ((s - d) * alpha >> 8)) & 0xff00; 14889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = d1 | d | 0xff000000; 14899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 14909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 14919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 14929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Two Pixels Blend */ 14939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp; 14949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 14959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s1 = s & 0xff00ff; 14969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = d & 0xff00ff; 14979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 += (s1 - d1) * alpha >> 8; 14989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 &= 0xff00ff; 14999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = ((s & 0xff00) >> 8) | 15019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((srcp[1] & 0xff00) << 8); 15029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = ((d & 0xff00) >> 8) | 15039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((dstp[1] & 0xff00) << 8); 15049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 8; 15059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x00ff00ff; 15069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d1 | ((d << 8) & 0xff00) | 0xff000000; 15089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 15099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s1 = *srcp; 15119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = *dstp; 15129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s1 &= 0xff00ff; 15139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 &= 0xff00ff; 15149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 += (s1 - d1) * alpha >> 8; 15159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 &= 0xff00ff; 15169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = d1 | ((d >> 8) & 0xff00) | 0xff000000; 15189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 15199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 15209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 15219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 15229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 15239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 15249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 15259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 15269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast ARGB888->(A)RGB888 blending with pixel alpha */ 15289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info) 15299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 15309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 15319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 15329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 15339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 15349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 15359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 15369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 15389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 15399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 dalpha; 15409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d; 15419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s1; 15429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d1; 15439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp; 15449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 alpha = s >> 24; 15459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: Here we special-case opaque alpha since the 15469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall compositioning used (>>8 instead of /255) doesn't handle 15479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall it correctly. Also special-case alpha=0 for speed? 15489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Benchmark this! */ 15499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha) { 15509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == SDL_ALPHA_OPAQUE) { 15519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); 15529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 15539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 15549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * take out the middle component (green), and process 15559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the other two in parallel. One multiply less. 15569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 15579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 15589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dalpha = d & 0xff000000; 15599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s1 = s & 0xff00ff; 15609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = d & 0xff00ff; 15619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; 15629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s &= 0xff00; 15639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0xff00; 15649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d + ((s - d) * alpha >> 8)) & 0xff00; 15659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = d1 | d | dalpha; 15669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 15679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 15689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 15699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 15709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 15719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 15729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 15739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 15749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 15759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if GCC_ASMBLIT 15779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 15789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) 15799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 15809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 15819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 15829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 15839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 15849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 15859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 15869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat* sf = info->src; 15879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 amask = sf->Amask; 15889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ ( 15909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* make mm6 all zeros. */ 15919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pxor %%mm6, %%mm6\n" 15929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 15939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Make a mask to preserve the alpha. */ 15949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %0, %%mm7\n\t" /* 0000F000 -> mm7 */ 15959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "punpcklbw %%mm7, %%mm7\n\t" /* FF000000 -> mm7 */ 15969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pcmpeqb %%mm4, %%mm4\n\t" /* FFFFFFFF -> mm4 */ 15979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movq %%mm4, %%mm3\n\t" /* FFFFFFFF -> mm3 (for later) */ 15989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pxor %%mm4, %%mm7\n\t" /* 00FFFFFF -> mm7 (mult mask) */ 15999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* form channel masks */ 16019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movq %%mm7, %%mm4\n\t" /* 00FFFFFF -> mm4 */ 16029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "packsswb %%mm6, %%mm4\n\t" /* 00000FFF -> mm4 (channel mask) */ 16039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "packsswb %%mm6, %%mm3\n\t" /* 0000FFFF -> mm3 */ 16049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pxor %%mm4, %%mm3\n\t" /* 0000F000 -> mm3 (~channel mask) */ 16059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* get alpha channel shift */ 16079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %1, %%mm5\n\t" /* Ashift -> mm5 */ 16089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : /* nothing */ : "rm" (amask), "rm" ((Uint32) sf->Ashift) ); 16109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 16129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 16149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 alpha; 16159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ ( 16179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "prefetch 64(%0)\n" 16189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "prefetch 64(%1)\n" 16199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : : "r" (srcp), "r" (dstp) ); 16209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha = *srcp & amask; 16229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: Here we special-case opaque alpha since the 16239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall compositioning used (>>8 instead of /255) doesn't handle 16249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall it correctly. Also special-case alpha=0 for speed? 16259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Benchmark this! */ 16269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 0) { 16279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* do nothing */ 16289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 16299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else if(alpha == amask) { 16309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* opaque alpha -- copy RGB, keep dst alpha */ 16319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* using MMX here to free up regular registers for other things */ 16329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ ( 16339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd (%0), %%mm0\n\t" /* src(ARGB) -> mm0 (0000ARGB)*/ 16349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd (%1), %%mm1\n\t" /* dst(ARGB) -> mm1 (0000ARGB)*/ 16359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pand %%mm4, %%mm0\n\t" /* src & chanmask -> mm0 */ 16369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pand %%mm3, %%mm1\n\t" /* dst & ~chanmask -> mm2 */ 16379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "por %%mm0, %%mm1\n\t" /* src | dst -> mm1 */ 16389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %%mm1, (%1) \n\t" /* mm1 -> dst */ 16399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : : "r" (srcp), "r" (dstp) ); 16419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 16429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else { 16449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ ( 16459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* load in the source, and dst. */ 16469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd (%0), %%mm0\n" /* mm0(s) = 0 0 0 0 | As Rs Gs Bs */ 16479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd (%1), %%mm1\n" /* mm1(d) = 0 0 0 0 | Ad Rd Gd Bd */ 16489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Move the src alpha into mm2 */ 16509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* if supporting pshufw */ 16529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /*"pshufw $0x55, %%mm0, %%mm2\n" */ /* mm2 = 0 As 0 As | 0 As 0 As */ 16539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /*"psrlw $8, %%mm2\n" */ 16549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* else: */ 16569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %2, %%mm2\n" 16579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "psrld %%mm5, %%mm2\n" /* mm2 = 0 0 0 0 | 0 0 0 As */ 16589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "punpcklwd %%mm2, %%mm2\n" /* mm2 = 0 0 0 0 | 0 As 0 As */ 16599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "punpckldq %%mm2, %%mm2\n" /* mm2 = 0 As 0 As | 0 As 0 As */ 16609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pand %%mm7, %%mm2\n" /* to preserve dest alpha */ 16619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* move the colors into words. */ 16639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "punpcklbw %%mm6, %%mm0\n" /* mm0 = 0 As 0 Rs | 0 Gs 0 Bs */ 16649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "punpcklbw %%mm6, %%mm1\n" /* mm0 = 0 Ad 0 Rd | 0 Gd 0 Bd */ 16659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* src - dst */ 16679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "psubw %%mm1, %%mm0\n" /* mm0 = As-Ad Rs-Rd | Gs-Gd Bs-Bd */ 16689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* A * (src-dst) */ 16709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "pmullw %%mm2, %%mm0\n" /* mm0 = 0*As-d As*Rs-d | As*Gs-d As*Bs-d */ 16719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "psrlw $8, %%mm0\n" /* mm0 = 0>>8 Rc>>8 | Gc>>8 Bc>>8 */ 16729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "paddb %%mm1, %%mm0\n" /* mm0 = 0+Ad Rc+Rd | Gc+Gd Bc+Bd */ 16739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "packuswb %%mm0, %%mm0\n" /* mm0 = | Ac Rc Gc Bc */ 16759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "movd %%mm0, (%1)\n" /* result in mm0 */ 16779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : : "r" (srcp), "r" (dstp), "r" (alpha) ); 16799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 16819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 16829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 16839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 16849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 16859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 16869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 16879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __asm__ ( 16899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall "emms\n" 16909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall : ); 16919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 16929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* End GCC_ASMBLIT*/ 16939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 16949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#elif MSVC_ASMBLIT 16959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */ 16969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo *info) 16979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 16989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 16999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 17009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 17019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 17029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *dstp = (Uint32 *)info->d_pixels; 17039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 2; 17049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat* sf = info->src; 17059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask; 17069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 amask = sf->Amask; 17079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 ashift = sf->Ashift; 17089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint64 multmask; 17099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __m64 src1, dst1, mm_alpha, mm_zero, dmask; 17119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */ 17139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall multmask = ~(0xFFFFi64 << (ashift * 2)); 17149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dmask = *(__m64*) &multmask; /* dst alpha mask -> dmask */ 17159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 17179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 17189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 alpha; 17199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _m_prefetch(srcp + 16); 17219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _m_prefetch(dstp + 16); 17229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha = *srcp & amask; 17249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (alpha == 0) { 17259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* do nothing */ 17269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else if (alpha == amask) { 17279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* copy RGB, keep dst alpha */ 17289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (*srcp & chanmask) | (*dstp & ~chanmask); 17299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 17309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/ 17319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */ 17329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/ 17349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */ 17359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */ 17379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */ 17389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 17399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 17409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */ 17419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 17439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */ 17449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */ 17459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */ 17469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */ 17479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */ 17489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */ 17509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 17519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++srcp; 17529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ++dstp; 17539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 17549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 17559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 17569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 17579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _mm_empty(); 17589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 17599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* End MSVC_ASMBLIT */ 17609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 17629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */ 17649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* blend a single 16 bit pixel at 50% */ 17669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define BLEND16_50(d, s, mask) \ 17679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff))) 17689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* blend two 16 bit pixels at 50% */ 17709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define BLEND2x16_50(d, s, mask) \ 17719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \ 17729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall + (s & d & (~(mask | mask << 16)))) 17739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask) 17759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 17769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 17779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 17789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 17799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 17809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 17819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 17829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 17849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(((uintptr_t)srcp ^ (uintptr_t)dstp) & 2) { 17859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 17869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Source and destination not aligned, pipeline it. 17879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * This is mostly a win for big blits but no loss for 17889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * small ones 17899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 17909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 prev_sw; 17919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int w = width; 17929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 17939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* handle odd destination */ 17949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if((uintptr_t)dstp & 2) { 17959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 d = *dstp, s = *srcp; 17969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = BLEND16_50(d, s, mask); 17979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; 17989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; 17999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall w--; 18009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; /* srcp is now 32-bit aligned */ 18029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* bootstrap pipeline with first halfword */ 18049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall prev_sw = ((Uint32 *)srcp)[-1]; 18059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(w > 1) { 18079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 sw, dw, s; 18089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall sw = *(Uint32 *)srcp; 18099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dw = *(Uint32 *)dstp; 18109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_BYTEORDER == SDL_BIG_ENDIAN 18119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (prev_sw << 16) + (sw >> 16); 18129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else 18139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (prev_sw >> 16) + (sw << 16); 18149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 18159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall prev_sw = sw; 18169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *(Uint32 *)dstp = BLEND2x16_50(dw, s, mask); 18179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 2; 18189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 2; 18199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall w -= 2; 18209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* final pixel if any */ 18239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(w) { 18249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 d = *dstp, s; 18259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_BYTEORDER == SDL_BIG_ENDIAN 18269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (Uint16)prev_sw; 18279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else 18289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (Uint16)(prev_sw >> 16); 18299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 18309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = BLEND16_50(d, s, mask); 18319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; 18329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; 18339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip - 1; 18359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 18369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 18379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* source and destination are aligned */ 18389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int w = width; 18399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* first odd pixel? */ 18419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if((uintptr_t)srcp & 2) { 18429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 d = *dstp, s = *srcp; 18439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = BLEND16_50(d, s, mask); 18449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; 18459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; 18469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall w--; 18479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* srcp and dstp are now 32-bit aligned */ 18499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(w > 1) { 18519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 sw = *(Uint32 *)srcp; 18529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 dw = *(Uint32 *)dstp; 18539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *(Uint32 *)dstp = BLEND2x16_50(dw, sw, mask); 18549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 2; 18559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 2; 18569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall w -= 2; 18579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* last odd pixel? */ 18609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(w) { 18619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 d = *dstp, s = *srcp; 18629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = BLEND16_50(d, s, mask); 18639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; 18649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; 18659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 18679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 18689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 18709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 18719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if GCC_ASMBLIT 18739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB565->RGB565 blending with surface alpha */ 18749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) 18759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 18769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 18779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 18789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Blit16to16SurfaceAlpha128(info, 0xf7de); 18799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 18809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 18819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 18829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 18839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 18849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 18859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 18869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s, d; 18879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint64 load; 18889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 18909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = alpha; 18919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha >>= 3; /* downscale alpha to 5 bits */ 18929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 18939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm0); /* alpha(0000000A) -> mm0 */ 18949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ 18959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ 18969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* position alpha to allow for mullo and mulhi on diff channels 18979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall to reduce the number of operations */ 18989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq_i2r(3, mm0); 18999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Setup the 565 color channel masks */ 19019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = 0x07E007E007E007E0ULL; 19029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm4); /* MASKGREEN -> mm4 */ 19039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = 0x001F001F001F001FULL; 19049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm7); /* MASKBLUE -> mm7 */ 19059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 19069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_QUATRO2( 19079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 19089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 19099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 19109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 19119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 19129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 19139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 19149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 19159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 19169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 19179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 19189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 19199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d | d >> 16; 19209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 19219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 19229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 19239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 19249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 19259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 19269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 19279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 19289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 19299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 19309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 19319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 19329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d | d >> 16; 19339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 19349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 19359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 19369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 19379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 19389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 19399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 19409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 19419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 19429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 19439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 19449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d | d >> 16; 19459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 19469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ 19479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ 19489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* red -- does not need a mask since the right shift clears 19509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall the uninteresting bits */ 19519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm5); /* src -> mm5 */ 19529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm3, mm6); /* dst -> mm6 */ 19539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 [000r 000r 000r 000r] */ 19549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(11, mm6); /* mm6 >> 11 -> mm6 [000r 000r 000r 000r] */ 19559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 19579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ 19589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ 19599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* alpha used is actually 11 bits 19609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 11 + 5 = 16 bits, so the sign bits are lost */ 19619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ 19629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ 19639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllw_i2r(11, mm6); /* mm6 << 11 -> mm6 */ 19649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm6, mm1); /* save new reds in dsts */ 19669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* green -- process the bits in place */ 19689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm5); /* src -> mm5 */ 19699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm3, mm6); /* dst -> mm6 */ 19709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ 19719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ 19729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 19749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ 19759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ 19769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 11 + 11 - 16 = 6 bits, so all the lower uninteresting 19779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bits are gone and the sign bits present */ 19789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ 19799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ 19809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm6, mm1); /* save new greens in dsts */ 19829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blue */ 19849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm5); /* src -> mm5 */ 19859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm3, mm6); /* dst -> mm6 */ 19869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ 19879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ 19889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 19909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ 19919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ 19929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 11 + 5 = 16 bits, so the sign bits are lost and 19939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall the interesting bits will need to be MASKed */ 19949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ 19959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ 19969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ 19979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 19989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm6, mm1); /* save new blues in dsts */ 19999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2m(mm1, *dstp); /* mm1 -> 4 dst pixels */ 20019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 20039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 20049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 20059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 20069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 20079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 20089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall emms(); 20099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 20109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 20119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB555->RGB555 blending with surface alpha */ 20139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) 20149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 20159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 20169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 20179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Blit16to16SurfaceAlpha128(info, 0xfbde); 20189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 20199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 20209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 20219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 20229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 20239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 20249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 20259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s, d; 20269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint64 load; 20279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 20299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = alpha; 20309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha >>= 3; /* downscale alpha to 5 bits */ 20319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm0); /* alpha(0000000A) -> mm0 */ 20339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm0, mm0); /* 00000A0A -> mm0 */ 20349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall punpcklwd_r2r(mm0, mm0); /* 0A0A0A0A -> mm0 */ 20359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* position alpha to allow for mullo and mulhi on diff channels 20369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall to reduce the number of operations */ 20379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq_i2r(3, mm0); 20389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Setup the 555 color channel masks */ 20409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = 0x03E003E003E003E0ULL; 20419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm4); /* MASKGREEN -> mm4 */ 20429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall load = 0x001F001F001F001FULL; 20439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r(load, mm7); /* MASKBLUE -> mm7 */ 20449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 20459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_QUATRO2( 20469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 20479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 20489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 20499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 20509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 20519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 20529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 20539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 20549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 20559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 20569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 20579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 20589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d | d >> 16; 20599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 20609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 20619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 20629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 20639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 20649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 20659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 20669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 20679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 20689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 20699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 20709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 20719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d | d >> 16; 20729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 20739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 20749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 20759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 20769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 20779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 20789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 20799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 20809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 20819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 20829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 20839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = d | d >> 16; 20849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 20859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*srcp), mm2);/* 4 src pixels -> mm2 */ 20869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_m2r((*dstp), mm3);/* 4 dst pixels -> mm3 */ 20879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* red -- process the bits in place */ 20899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllq_i2r(5, mm4); /* turn MASKGREEN into MASKRED */ 20909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* by reusing the GREEN mask we free up another mmx 20919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall register to accumulate the result */ 20929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm5); /* src -> mm5 */ 20949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm3, mm6); /* dst -> mm6 */ 20959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm5); /* src & MASKRED -> mm5 */ 20969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm6); /* dst & MASKRED -> mm6 */ 20979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 20989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 20999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ 21009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ 21019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 11 + 15 - 16 = 10 bits, uninteresting bits will be 21029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall cleared by a MASK below */ 21039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ 21049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ 21059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm6); /* mm6 & MASKRED -> mm6 */ 21069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlq_i2r(5, mm4); /* turn MASKRED back into MASKGREEN */ 21089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm6, mm1); /* save new reds in dsts */ 21109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* green -- process the bits in place */ 21129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm5); /* src -> mm5 */ 21139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm3, mm6); /* dst -> mm6 */ 21149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm5); /* src & MASKGREEN -> mm5 */ 21159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm4, mm6); /* dst & MASKGREEN -> mm6 */ 21169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 21189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ 21199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmulhw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ 21209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 11 + 10 - 16 = 5 bits, so all the lower uninteresting 21219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bits are gone and the sign bits present */ 21229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psllw_i2r(5, mm5); /* mm5 << 5 -> mm5 */ 21239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ 21249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm6, mm1); /* save new greens in dsts */ 21269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blue */ 21289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm2, mm5); /* src -> mm5 */ 21299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2r(mm3, mm6); /* dst -> mm6 */ 21309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm5); /* src & MASKBLUE -> mm5[000b 000b 000b 000b] */ 21319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm6); /* dst & MASKBLUE -> mm6[000b 000b 000b 000b] */ 21329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 21349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psubw_r2r(mm6, mm5);/* src - dst -> mm5 */ 21359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pmullw_r2r(mm0, mm5); /* mm5 * alpha -> mm5 */ 21369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 11 + 5 = 16 bits, so the sign bits are lost and 21379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall the interesting bits will need to be MASKed */ 21389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall psrlw_i2r(11, mm5); /* mm5 >> 11 -> mm5 */ 21399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall paddw_r2r(mm5, mm6); /* mm5 + mm6(dst) -> mm6 */ 21409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall pand_r2r(mm7, mm6); /* mm6 & MASKBLUE -> mm6[000b 000b 000b 000b] */ 21419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall por_r2r(mm6, mm1); /* save new blues in dsts */ 21439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall movq_r2m(mm1, *dstp);/* mm1 -> 4 dst pixels */ 21459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 21479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 21489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 21499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 21509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 21519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 21529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall emms(); 21539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 21549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 21559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* End GCC_ASMBLIT */ 21569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#elif MSVC_ASMBLIT 21589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB565->RGB565 blending with surface alpha */ 21599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info) 21609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 21619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 21629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 21639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Blit16to16SurfaceAlpha128(info, 0xf7de); 21649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 21659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 21669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 21679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 21689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 21699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 21709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 21719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s, d; 21729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha; 21749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 21769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 21779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha >>= 3; /* downscale alpha to 5 bits */ 21789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 21809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 21819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* position alpha to allow for mullo and mulhi on diff channels 21829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall to reduce the number of operations */ 21839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_slli_si64(mm_alpha, 3); 21849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Setup the 565 color channel masks */ 21869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */ 21879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 21889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 21899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 21909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_QUATRO2( 21919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 21929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 21939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 21949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 21959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 21969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 21979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 21989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 21999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 22009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 22019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 22029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 22039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 22049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 22059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 22069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 22079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 22089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 22099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 22109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 22119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 22129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 22139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 22149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 22159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 22169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 22179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 22189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 22199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 22209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 22219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 22229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 22239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 22249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 22259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 22269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 22279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 22289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 22299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 22309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ 22319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ 22329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* red */ 22349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; 22359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */ 22369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; 22389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */ 22399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 22419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 22429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 22439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ 22449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 22459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */ 22469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_res = dst2; /* RED -> mm_res */ 22489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* green -- process the bits in place */ 22509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; 22519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ 22529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; 22549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ 22559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 22579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 22589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 22599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ 22609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 22619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ 22639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blue */ 22659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; 22669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ 22679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; 22699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ 22709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 22729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 22739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 22749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ 22759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 22769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ 22779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ 22799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 22819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 22839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 22849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 22859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 22869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 22879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 22889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _mm_empty(); 22899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 22909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 22919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 22929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB555->RGB555 blending with surface alpha */ 22939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info) 22949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 22959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 22969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 22979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Blit16to16SurfaceAlpha128(info, 0xfbde); 22989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 22999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 23009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 23019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 23029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 23039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 23049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 23059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s, d; 23069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha; 23089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha &= ~(1+2+4); /* cut alpha to get the exact same behaviour */ 23109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */ 23119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha >>= 3; /* downscale alpha to 5 bits */ 23129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */ 23149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */ 23159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* position alpha to allow for mullo and mulhi on diff channels 23169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall to reduce the number of operations */ 23179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_alpha = _mm_slli_si64(mm_alpha, 3); 23189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Setup the 555 color channel masks */ 23209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */ 23219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */ 23229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */ 23239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 23259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP_QUATRO2( 23269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 23279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 23289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 23299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 23309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 23319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 23329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 23339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 23349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 23359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 23369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 23379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 23389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 23399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 23409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 23419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 23429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 23439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 23449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 23459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 23469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 23479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 23489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 23499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 23509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 23519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 23529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = *srcp++; 23539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = *dstp; 23549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 23559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 23569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 23579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 23589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 23599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 23609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 23619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 23629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 23639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 23649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall },{ 23659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */ 23669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */ 23679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* red -- process the bits in place */ 23699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; 23709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */ 23719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; 23739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */ 23749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 23769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 23779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 23789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ 23799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 23809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */ 23819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_res = dst2; /* RED -> mm_res */ 23839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* green -- process the bits in place */ 23859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; 23869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */ 23879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; 23899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */ 23909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 23929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 23939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 23949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */ 23959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 23969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */ 23989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 23999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blue */ 24009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = src1; /* src -> src2 */ 24019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */ 24029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = dst1; /* dst -> dst2 */ 24049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */ 24059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* blend */ 24079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */ 24089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */ 24099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */ 24109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */ 24119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */ 24129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */ 24149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */ 24169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += 4; 24189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += 4; 24199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 24209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 24219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 24229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 24239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall _mm_empty(); 24249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 24259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 24269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif /* GCC_ASMBLIT, MSVC_ASMBLIT */ 24279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB565->RGB565 blending with surface alpha */ 24299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) 24309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 24319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; 24329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 24339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Blit16to16SurfaceAlpha128(info, 0xf7de); 24349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 24359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 24369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 24379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 24389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 24399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 24409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 24419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha >>= 3; /* downscale alpha to 5 bits */ 24429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 24449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 24459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp++; 24469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 24479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 24489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 24499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 24509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 24519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 24529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x07e0f81f; 24539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 24549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 24559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 24569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 24579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 24589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 24599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 24609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 24619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 24629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 24639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast RGB555->RGB555 blending with surface alpha */ 24659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) 24669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 24679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = info->src->alpha; /* downscale alpha to 5 bits */ 24689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == 128) { 24699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Blit16to16SurfaceAlpha128(info, 0xfbde); 24709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 24719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 24729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 24739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *srcp = (Uint16 *)info->s_pixels; 24749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 1; 24759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 24769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 24779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha >>= 3; /* downscale alpha to 5 bits */ 24789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 24799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 24809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 24819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp++; 24829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 24839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 24849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * shift out the middle component (green) to 24859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * the high 16 bits, and process all three RGB 24869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. 24879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 24889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = (s | s << 16) & 0x03e07c1f; 24899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 24909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 24919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 24929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp++ = (Uint16)(d | d >> 16); 24939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 24949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 24959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 24969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 24979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 24989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 24999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 25009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast ARGB8888->RGB565 blending with pixel alpha */ 25019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) 25029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 25039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 25049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 25059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 25069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 25079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 25089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 25099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 25109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 25119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 25129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp; 25139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ 25149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: Here we special-case opaque alpha since the 25159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall compositioning used (>>8 instead of /255) doesn't handle 25169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall it correctly. Also special-case alpha=0 for speed? 25179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Benchmark this! */ 25189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha) { 25199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { 25209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f)); 25219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 25229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 25239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 25249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * convert source and destination to G0RAB65565 25259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * and blend all components at the same time 25269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 25279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800) 25289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall + (s >> 3 & 0x1f); 25299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x07e0f81f; 25309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 25319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x07e0f81f; 25329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (Uint16)(d | d >> 16); 25339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 25349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 25359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; 25369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; 25379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 25389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 25399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 25409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 25419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 25429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 25439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* fast ARGB8888->RGB555 blending with pixel alpha */ 25449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) 25459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 25469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 25479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 25489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 *srcp = (Uint32 *)info->s_pixels; 25499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip >> 2; 25509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint16 *dstp = (Uint16 *)info->d_pixels; 25519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip >> 1; 25529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 25539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while(height--) { 25549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4({ 25559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned alpha; 25569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 s = *srcp; 25579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall alpha = s >> 27; /* downscale alpha to 5 bits */ 25589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: Here we special-case opaque alpha since the 25599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall compositioning used (>>8 instead of /255) doesn't handle 25609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall it correctly. Also special-case alpha=0 for speed? 25619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Benchmark this! */ 25629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha) { 25639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(alpha == (SDL_ALPHA_OPAQUE >> 3)) { 25649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f)); 25659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 25669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 d = *dstp; 25679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* 25689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * convert source and destination to G0RAB65565 25699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * and blend all components at the same time 25709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */ 25719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00) 25729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall + (s >> 3 & 0x1f); 25739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d = (d | d << 16) & 0x03e07c1f; 25749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d += (s - d) * alpha >> 5; 25759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall d &= 0x03e07c1f; 25769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *dstp = (Uint16)(d | d >> 16); 25779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 25789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 25799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp++; 25809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp++; 25819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, width); 25829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcp += srcskip; 25839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstp += dstskip; 25849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 25859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 25869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 25879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* General (slow) N->N blending with per-surface alpha */ 25889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) 25899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 25909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 25919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 25929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = info->s_pixels; 25939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 25949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = info->d_pixels; 25959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 25969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 25979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 25989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcbpp = srcfmt->BytesPerPixel; 25999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstbpp = dstfmt->BytesPerPixel; 26009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sA = srcfmt->alpha; 26019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 26029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sA) { 26049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 26059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4( 26069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 26079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; 26089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR; 26099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sG; 26109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sB; 26119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dR; 26129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dG; 26139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dB; 26149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); 26159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); 26169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 26179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 26189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcbpp; 26199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstbpp; 26209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, 26219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width); 26229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 26239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 26249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 26259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 26269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 26279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* General (slow) colorkeyed N->N blending with per-surface alpha */ 26299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) 26309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 26319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 26329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 26339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = info->s_pixels; 26349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 26359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = info->d_pixels; 26369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 26379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 26389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 26399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 ckey = srcfmt->colorkey; 26409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcbpp = srcfmt->BytesPerPixel; 26419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstbpp = dstfmt->BytesPerPixel; 26429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sA = srcfmt->alpha; 26439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0; 26449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 26469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4( 26479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 26489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; 26499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR; 26509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sG; 26519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sB; 26529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dR; 26539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dG; 26549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dB; 26559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); 26569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sA && Pixel != ckey) { 26579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); 26589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGB(dst, dstbpp, dstfmt, Pixel, dR, dG, dB); 26599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 26609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 26619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 26629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcbpp; 26639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstbpp; 26649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, 26659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width); 26669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 26679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 26689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 26699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 26709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* General (slow) N->N blending with pixel alpha */ 26729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void BlitNtoNPixelAlpha(SDL_BlitInfo *info) 26739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 26749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int width = info->d_width; 26759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int height = info->d_height; 26769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *src = info->s_pixels; 26779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcskip = info->s_skip; 26789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint8 *dst = info->d_pixels; 26799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstskip = info->d_skip; 26809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *srcfmt = info->src; 26819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *dstfmt = info->dst; 26829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int srcbpp; 26849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall int dstbpp; 26859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Set up some basic variables */ 26879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall srcbpp = srcfmt->BytesPerPixel; 26889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dstbpp = dstfmt->BytesPerPixel; 26899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 26909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* FIXME: for 8bpp source alpha, this doesn't get opaque values 26919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall quite right. for <8bpp source alpha, it gets them very wrong 26929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall (check all macros!) 26939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall It is unclear whether there is a good general solution that doesn't 26949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall need a branch (or a divide). */ 26959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall while ( height-- ) { 26969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DUFFS_LOOP4( 26979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 26989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall Uint32 Pixel; 26999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sR; 27009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sG; 27019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sB; 27029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dR; 27039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dG; 27049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dB; 27059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned sA; 27069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall unsigned dA; 27079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); 27089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sA) { 27099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); 27109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); 27119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA); 27129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcbpp; 27149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstbpp; 27159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall }, 27169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall width); 27179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall src += srcskip; 27189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall dst += dstskip; 27199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 27219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 27229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 27239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse HallSDL_loblit SDL_CalculateAlphaBlit(SDL_Surface *surface, int blit_index) 27249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{ 27259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *sf = surface->format; 27269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall SDL_PixelFormat *df = surface->map->dst->format; 27279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 27289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->Amask == 0) { 27299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 27309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(df->BytesPerPixel == 1) 27319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNto1SurfaceAlphaKey; 27329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 27339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 27349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 && 27359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 27369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit32to32SurfaceAlphaKeyAltivec; 27379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 27389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 27399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNSurfaceAlphaKey; 27409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 27419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Per-surface alpha blits */ 27429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall switch(df->BytesPerPixel) { 27439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 1: 27449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNto1SurfaceAlpha; 27459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 27469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 2: 27479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(surface->map->identity) { 27489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(df->Gmask == 0x7e0) 27499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 27509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if MMX_ASMBLIT 27519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(SDL_HasMMX()) 27529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit565to565SurfaceAlphaMMX; 27539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 27549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 27559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit565to565SurfaceAlpha; 27569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else if(df->Gmask == 0x3e0) 27589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 27599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if MMX_ASMBLIT 27609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(SDL_HasMMX()) 27619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit555to555SurfaceAlphaMMX; 27629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 27639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 27649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit555to555SurfaceAlpha; 27659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNSurfaceAlpha; 27689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 27699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 4: 27709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->Rmask == df->Rmask 27719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Gmask == df->Gmask 27729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Bmask == df->Bmask 27739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->BytesPerPixel == 4) 27749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 27759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if MMX_ASMBLIT 27769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->Rshift % 8 == 0 27779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Gshift % 8 == 0 27789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Bshift % 8 == 0 27799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && SDL_HasMMX()) 27809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBSurfaceAlphaMMX; 27819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 27829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) 27839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 27849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 27859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(!(surface->map->dst->flags & SDL_HWSURFACE) 27869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && SDL_HasAltiVec()) 27879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBSurfaceAlphaAltivec; 27889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 27899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBSurfaceAlpha; 27909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 27929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 27939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if((sf->BytesPerPixel == 4) && 27949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 27959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit32to32SurfaceAlphaAltivec; 27969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 27979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 27989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNSurfaceAlpha; 27999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 28009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 3: 28019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall default: 28029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNSurfaceAlpha; 28039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } else { 28069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall /* Per-pixel alpha blits */ 28079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall switch(df->BytesPerPixel) { 28089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 1: 28099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNto1PixelAlpha; 28109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 28119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 2: 28129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 28139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->BytesPerPixel == 4 && !(surface->map->dst->flags & SDL_HWSURFACE) && 28149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall df->Gmask == 0x7e0 && 28159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall df->Bmask == 0x1f && SDL_HasAltiVec()) 28169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit32to565PixelAlphaAltivec; 28179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 28189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 28199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->BytesPerPixel == 4 && sf->Amask == 0xff000000 28209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Gmask == 0xff00 28219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && ((sf->Rmask == 0xff && df->Rmask == 0x1f) 28229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall || (sf->Bmask == 0xff && df->Bmask == 0x1f))) { 28239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(df->Gmask == 0x7e0) 28249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitARGBto565PixelAlpha; 28259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else if(df->Gmask == 0x3e0) 28269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitARGBto555PixelAlpha; 28279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNPixelAlpha; 28299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 28309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 4: 28319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->Rmask == df->Rmask 28329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Gmask == df->Gmask 28339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Bmask == df->Bmask 28349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->BytesPerPixel == 4) 28359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 28369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if MMX_ASMBLIT 28379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->Rshift % 8 == 0 28389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Gshift % 8 == 0 28399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Bshift % 8 == 0 28409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Ashift % 8 == 0 28419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && sf->Aloss == 0) 28429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 28439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(SDL_Has3DNow()) 28449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBPixelAlphaMMX3DNOW; 28459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(SDL_HasMMX()) 28469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBPixelAlphaMMX; 28479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 28499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(sf->Amask == 0xff000000) 28509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall { 28519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 28529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if(!(surface->map->dst->flags & SDL_HWSURFACE) 28539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall && SDL_HasAltiVec()) 28549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBPixelAlphaAltivec; 28559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 28569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitRGBtoRGBPixelAlpha; 28579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_ALTIVEC_BLITTERS 28609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall if (sf->Amask && sf->BytesPerPixel == 4 && 28619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall !(surface->map->dst->flags & SDL_HWSURFACE) && SDL_HasAltiVec()) 28629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return Blit32to32PixelAlphaAltivec; 28639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall else 28649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif 28659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNPixelAlpha; 28669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 28679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall case 3: 28689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall default: 28699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall return BlitNtoNPixelAlpha; 28709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall } 28729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} 28739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall 2874