19682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
29682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL - Simple DirectMedia Layer
39682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Copyright (C) 1997-2012 Sam Lantinga
49682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
59682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    This library is free software; you can redistribute it and/or
69682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    modify it under the terms of the GNU Lesser General Public
79682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    License as published by the Free Software Foundation; either
89682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    version 2.1 of the License, or (at your option) any later version.
99682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    This library is distributed in the hope that it will be useful,
119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    but WITHOUT ANY WARRANTY; without even the implied warranty of
129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Lesser General Public License for more details.
149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    You should have received a copy of the GNU Lesser General Public
169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    License along with this library; if not, write to the Free Software
179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Sam Lantinga
209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    slouken@libsdl.org
219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall*/
229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_config.h"
239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * RLE encoding for software colorkey and alpha-channel acceleration
269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Original version by Sam Lantinga
289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Mattias Engdeg�rd (Yorick): Rewrite. New encoding format, encoder and
309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * decoder. Added per-surface alpha blitter. Added per-pixel alpha
319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * format, encoder and blitter.
329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Many thanks to Xark and johns for hints, benchmarks and useful comments
349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * leading to this code.
359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Welcome to Macro Mayhem.
379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * The encoding translates the image data to a stream of segments of the form
419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * <skip> <run> <data>
439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * where <skip> is the number of transparent pixels to skip,
459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *       <run>  is the number of opaque pixels to blit,
469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * and   <data> are the pixels themselves.
479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * This basic structure is used both for colorkeyed surfaces, used for simple
499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * binary transparency and for per-surface alpha blending, and for surfaces
509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * with per-pixel alpha. The details differ, however:
519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Encoding of colorkeyed surfaces:
539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   Encoded pixels always have the same format as the target surface.
559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   where they are 16 bit. This makes the pixel data aligned at all times.
579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   Segments never wrap around from one scan line to the next.
589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   beginning of a line.
619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Encoding of surfaces with per-pixel alpha:
639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   The sequence begins with a struct RLEDestFormat describing the target
659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   pixel format, to provide reliable un-encoding.
669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   Each scan line is encoded twice: First all completely opaque pixels,
689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   encoded in the target format as described above, and then all
699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   in the following 32-bit format:
719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   For 32-bit targets, each pixel has the target RGB format but with
739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   the alpha value occupying the highest 8 bits. The <skip> and <run>
749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   counts are 16 bit.
759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   For 16-bit targets, each pixel has the target RGB format, but with
779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   the middle component (usually green) shifted 16 steps to the left,
789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   and the hole filled with the 5 most significant bits of the alpha value.
799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   i.e. if the target has the format         rrrrrggggggbbbbb,
809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   for the translucent lines. Two padding bytes may be inserted
839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   before each translucent line to keep them 32-bit aligned.
849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *
859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   The end of the sequence is marked by a zero <skip>,<run> pair at the
869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall *   beginning of an opaque line.
879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_video.h"
909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_sysvideo.h"
919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_blit.h"
929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_RLEaccel_c.h"
939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* Force MMX to 0; this blows up on almost every major compiler now. --ryan. */
959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if 0 && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define MMX_ASMBLIT
979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef MMX_ASMBLIT
1009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "mmx.h"
1019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#include "SDL_cpuinfo.h"
1029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
1039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifndef MAX
1059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define MAX(a, b) ((a) > (b) ? (a) : (b))
1069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
1079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifndef MIN
1089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define MIN(a, b) ((a) < (b) ? (a) : (b))
1099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
1109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define PIXEL_COPY(to, from, len, bpp)			\
1129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Halldo {							\
1139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if(bpp == 4) {					\
1149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	SDL_memcpy4(to, from, (size_t)(len));		\
1159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } else {						\
1169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	SDL_memcpy(to, from, (size_t)(len) * (bpp));	\
1179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }							\
1189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} while(0)
1199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
1219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Various colorkey blit methods, for opaque and per-surface alpha
1229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
1239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
1259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    PIXEL_COPY(to, from, length, bpp)
1269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef MMX_ASMBLIT
1289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha)	\
1309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {							\
1319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *srcp = (Uint32 *)(from);			\
1329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *dstp = (Uint32 *)(to);				\
1339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i = 0x00FF00FF;					\
1349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm3);					\
1359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm3, mm3);				\
1369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = 0xFF000000;						\
1379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm7);					\
1389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm7, mm7);				\
1399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = alpha | alpha << 16;				\
1409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm4);					\
1419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm4, mm4);				\
1429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */		\
1439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pxor_r2r(mm7, mm5); /* make clear alpha mask */		\
1449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = length;						\
1459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(i & 1) {						\
1469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          movd_m2r((*srcp), mm1); /* src -> mm1 */		\
1479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          punpcklbw_r2r(mm1, mm1);				\
1489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          pand_r2r(mm3, mm1);					\
1499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movd_m2r((*dstp), mm2); /* dst -> mm2 */		\
1509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          punpcklbw_r2r(mm2, mm2);				\
1519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          pand_r2r(mm3, mm2);					\
1529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm2, mm1);					\
1539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm4, mm1);					\
1549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm1);					\
1559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm1, mm2);					\
1569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm3, mm2);					\
1579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  packuswb_r2r(mm2, mm2);				\
1589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */		\
1599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movd_r2m(mm2, *dstp);					\
1609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  ++srcp;						\
1619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  ++dstp;						\
1629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  i--;							\
1639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}							\
1649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(; i > 0; --i) {					\
1659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          movq_m2r((*srcp), mm0);				\
1669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm0, mm1);					\
1679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          punpcklbw_r2r(mm0, mm0);				\
1689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_m2r((*dstp), mm2);				\
1699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  punpckhbw_r2r(mm1, mm1);				\
1709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm6);					\
1719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          pand_r2r(mm3, mm0);					\
1729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          punpcklbw_r2r(mm2, mm2);				\
1739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm3, mm1);					\
1749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  punpckhbw_r2r(mm6, mm6);				\
1759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          pand_r2r(mm3, mm2);					\
1769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm2, mm0);					\
1779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm4, mm0);					\
1789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm3, mm6);					\
1799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm1);					\
1809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm4, mm1);					\
1819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm0);					\
1829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm0, mm2);					\
1839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm1);					\
1849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm1, mm6);					\
1859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm3, mm2);					\
1869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm3, mm6);					\
1879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  packuswb_r2r(mm2, mm2);				\
1889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  packuswb_r2r(mm6, mm6);				\
1899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(32, mm2);					\
1909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psllq_i2r(32, mm6);					\
1919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm2);					\
1929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */		\
1939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall         movq_r2m(mm2, *dstp);					\
1949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  srcp += 2;						\
1959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  dstp += 2;						\
1969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  i--;							\
1979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}							\
1989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	emms();							\
1999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
2009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha)	\
2029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {						\
2039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i, n = 0;					\
2049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *srcp = (Uint16 *)(from);		\
2059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *dstp = (Uint16 *)(to);			\
2069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        Uint32 ALPHA = 0xF800;				\
2079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movd_m2r(*(&ALPHA), mm1);			\
2089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm1, mm1);			\
2099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm1, mm1);			\
2109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ALPHA = 0x07E0;					\
2119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movd_m2r(*(&ALPHA), mm4);			\
2129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm4, mm4);			\
2139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm4, mm4);			\
2149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ALPHA = 0x001F;					\
2159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movd_m2r(*(&ALPHA), mm7);			\
2169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm7, mm7);			\
2179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm7, mm7);			\
2189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	alpha &= ~(1+2+4);				\
2199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = (Uint32)alpha | (Uint32)alpha << 16;	\
2209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm0);				\
2219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm0, mm0);			\
2229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ALPHA = alpha >> 3;				\
2239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = ((int)(length) & 3);			\
2249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(; i > 0; --i) {				\
2259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s = *srcp++;				\
2269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d = *dstp;				\
2279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    s = (s | s << 16) & 0x07e0f81f;		\
2289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d = (d | d << 16) & 0x07e0f81f;		\
2299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d += (s - d) * ALPHA >> 5;			\
2309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d &= 0x07e0f81f;				\
2319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    *dstp++ = d | d >> 16;			\
2329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    n++;					\
2339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}						\
2349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	i = (int)(length) - n;				\
2359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(; i > 0; --i) {				\
2369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_m2r((*dstp), mm3);			\
2379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_m2r((*srcp), mm2);			\
2389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm5);				\
2399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm1 , mm5);				\
2409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(11, mm5);				\
2419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm3, mm6);				\
2429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm1 , mm6);				\
2439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(11, mm6);				\
2449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm5);				\
2459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm0, mm5);				\
2469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm5);				\
2479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm5, mm6);				\
2489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psllq_i2r(11, mm6);				\
2499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm1, mm6);				\
2509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm4, mm5);				\
2519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm7, mm5);				\
2529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm3);				\
2539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm3);				\
2549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm5);				\
2559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm4 , mm5);				\
2569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(5, mm5);				\
2579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm3, mm6);				\
2589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm4 , mm6);				\
2599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(5, mm6);				\
2609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm5);				\
2619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm0, mm5);				\
2629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm5);				\
2639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm5, mm6);				\
2649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psllq_i2r(5, mm6);				\
2659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm4, mm6);				\
2669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm1, mm5);				\
2679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm7, mm5);				\
2689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm3);				\
2699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm3);				\
2709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm5);				\
2719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm7 , mm5);				\
2729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          movq_r2r(mm3, mm6);				\
2739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm7 , mm6);				\
2749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm5);				\
2759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm0, mm5);				\
2769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm5);				\
2779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm5, mm6);				\
2789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm7, mm6);				\
2799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm1, mm5);				\
2809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm4, mm5);				\
2819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm3);				\
2829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm3);				\
2839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2m(mm3, *dstp);				\
2849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  srcp += 4;					\
2859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  dstp += 4;					\
2869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  i -= 3;					\
2879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}						\
2889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	emms();						\
2899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
2909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
2919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha)	\
2929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {						\
2939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i, n = 0;					\
2949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *srcp = (Uint16 *)(from);		\
2959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *dstp = (Uint16 *)(to);			\
2969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        Uint32 ALPHA = 0x7C00;				\
2979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movd_m2r(*(&ALPHA), mm1);			\
2989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm1, mm1);			\
2999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm1, mm1);			\
3009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ALPHA = 0x03E0;					\
3019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&ALPHA), mm4);			\
3029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm4, mm4);			\
3039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm4, mm4);			\
3049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ALPHA = 0x001F;					\
3059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	movd_m2r(*(&ALPHA), mm7);			\
3069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm7, mm7);			\
3079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpcklwd_r2r(mm7, mm7);			\
3089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	alpha &= ~(1+2+4);				\
3099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = (Uint32)alpha | (Uint32)alpha << 16;	\
3109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm0);				\
3119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm0, mm0);			\
3129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = ((int)(length) & 3);				\
3139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        ALPHA = alpha >> 3;				\
3149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(; i > 0; --i) {				\
3159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s = *srcp++;				\
3169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d = *dstp;				\
3179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    s = (s | s << 16) & 0x03e07c1f;		\
3189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d = (d | d << 16) & 0x03e07c1f;		\
3199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d += (s - d) * ALPHA >> 5;			\
3209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d &= 0x03e07c1f;				\
3219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    *dstp++ = d | d >> 16;			\
3229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    n++;					\
3239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}						\
3249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	i = (int)(length) - n;				\
3259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(; i > 0; --i) {				\
3269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_m2r((*dstp), mm3);			\
3279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_m2r((*srcp), mm2);			\
3289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm5);				\
3299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm1 , mm5);				\
3309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(10, mm5);				\
3319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm3, mm6);				\
3329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm1 , mm6);				\
3339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(10, mm6);				\
3349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm5);				\
3359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm0, mm5);				\
3369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm5);				\
3379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm5, mm6);				\
3389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psllq_i2r(10, mm6);				\
3399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm1, mm6);				\
3409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm4, mm5);				\
3419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm7, mm5);				\
3429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm3);				\
3439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm3);				\
3449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm5);				\
3459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm4 , mm5);				\
3469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(5, mm5);				\
3479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm3, mm6);				\
3489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm4 , mm6);				\
3499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlq_i2r(5, mm6);				\
3509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm5);				\
3519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm0, mm5);				\
3529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm5);				\
3539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm5, mm6);				\
3549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psllq_i2r(5, mm6);				\
3559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm4, mm6);				\
3569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm1, mm5);				\
3579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm7, mm5);				\
3589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm3);				\
3599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm3);				\
3609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm2, mm5);				\
3619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm7 , mm5);				\
3629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall          movq_r2r(mm3, mm6);				\
3639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm7 , mm6);				\
3649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psubw_r2r(mm6, mm5);				\
3659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pmullw_r2r(mm0, mm5);				\
3669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  psrlw_i2r(8, mm5);				\
3679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  paddw_r2r(mm5, mm6);				\
3689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm7, mm6);				\
3699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2r(mm1, mm5);				\
3709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm4, mm5);				\
3719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  pand_r2r(mm5, mm3);				\
3729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  por_r2r(mm6, mm3);				\
3739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  movq_r2m(mm3, *dstp);				\
3749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  srcp += 4;					\
3759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  dstp += 4;					\
3769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  i -= 3;					\
3779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}						\
3789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	emms();						\
3799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
3809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
3829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
3839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
3849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * For 32bpp pixels on the form 0x00rrggbb:
3859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * If we treat the middle component separately, we can process the two
3869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * remaining in parallel. This is safe to do because of the gap to the left
3879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * of each component, so the bits from the multiplication don't collide.
3889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * This can be used for any RGB permutation of course.
3899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
3909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT32_888(to, from, length, bpp, alpha)		\
3919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {							\
3929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i;							\
3939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *src = (Uint32 *)(from);				\
3949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *dst = (Uint32 *)(to);				\
3959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(i = 0; i < (int)(length); i++) {			\
3969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s = *src++;					\
3979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d = *dst;					\
3989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s1 = s & 0xff00ff;				\
3999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d1 = d & 0xff00ff;				\
4009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
4019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    s &= 0xff00;					\
4029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d &= 0xff00;					\
4039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
4049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    *dst++ = d1 | d;					\
4059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}							\
4069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
4079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
4099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * For 16bpp pixels we can go a step further: put the middle component
4109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * in the high 16 bits of a 32 bit word, and process all three RGB
4119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * components at the same time. Since the smallest gap is here just
4129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * 5 bits, we have to scale alpha down to 5 bits as well.
4139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
4149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_565(to, from, length, bpp, alpha)	\
4159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {						\
4169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i;						\
4179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *src = (Uint16 *)(from);			\
4189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *dst = (Uint16 *)(to);			\
4199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 ALPHA = alpha >> 3;			\
4209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(i = 0; i < (int)(length); i++) {		\
4219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s = *src++;				\
4229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d = *dst;				\
4239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    s = (s | s << 16) & 0x07e0f81f;		\
4249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d = (d | d << 16) & 0x07e0f81f;		\
4259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d += (s - d) * ALPHA >> 5;			\
4269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d &= 0x07e0f81f;				\
4279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    *dst++ = (Uint16)(d | d >> 16);			\
4289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}						\
4299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
4309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_555(to, from, length, bpp, alpha)	\
4329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {						\
4339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i;						\
4349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *src = (Uint16 *)(from);			\
4359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *dst = (Uint16 *)(to);			\
4369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 ALPHA = alpha >> 3;			\
4379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(i = 0; i < (int)(length); i++) {		\
4389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s = *src++;				\
4399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d = *dst;				\
4409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    s = (s | s << 16) & 0x03e07c1f;		\
4419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d = (d | d << 16) & 0x03e07c1f;		\
4429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d += (s - d) * ALPHA >> 5;			\
4439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    d &= 0x03e07c1f;				\
4449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    *dst++ = (Uint16)(d | d >> 16);			\
4459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}						\
4469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
4479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
4489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
4499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * The general slow catch-all function, for remaining depths and formats
4509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
4519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)			\
4529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								\
4539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i;								\
4549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *src = from;						\
4559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *dst = to;						\
4569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(i = 0; i < (int)(length); i++) {				\
4579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s, d;						\
4589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    unsigned rs, gs, bs, rd, gd, bd;				\
4599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    switch(bpp) {						\
4609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 2:							\
4619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		s = *(Uint16 *)src;					\
4629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		d = *(Uint16 *)dst;					\
4639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
4649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 3:							\
4659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
4669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    s = (src[0] << 16) | (src[1] << 8) | src[2];	\
4679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    d = (dst[0] << 16) | (dst[1] << 8) | dst[2];	\
4689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} else {						\
4699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    s = (src[2] << 16) | (src[1] << 8) | src[0];	\
4709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    d = (dst[2] << 16) | (dst[1] << 8) | dst[0];	\
4719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							\
4729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
4739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 4:							\
4749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		s = *(Uint32 *)src;					\
4759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		d = *(Uint32 *)dst;					\
4769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
4779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
4789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    RGB_FROM_PIXEL(s, fmt, rs, gs, bs);				\
4799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    RGB_FROM_PIXEL(d, fmt, rd, gd, bd);				\
4809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    rd += (rs - rd) * alpha >> 8;				\
4819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    gd += (gs - gd) * alpha >> 8;				\
4829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    bd += (bs - bd) * alpha >> 8;				\
4839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    PIXEL_FROM_RGB(d, fmt, rd, gd, bd);				\
4849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    switch(bpp) {						\
4859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 2:							\
4869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		*(Uint16 *)dst = (Uint16)d;					\
4879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
4889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 3:							\
4899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
4909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst[0] = (Uint8)(d >> 16);					\
4919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst[1] = (Uint8)(d >> 8);					\
4929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst[2] = (Uint8)(d);						\
4939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} else {						\
4949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst[0] = (Uint8)d;						\
4959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst[1] = (Uint8)(d >> 8);					\
4969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst[2] = (Uint8)(d >> 16);					\
4979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							\
4989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
4999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 4:							\
5009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		*(Uint32 *)dst = d;					\
5019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
5029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
5039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    src += bpp;							\
5049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst += bpp;							\
5059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
5069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
5079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef MMX_ASMBLIT
5099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha)		\
5119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								\
5129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *srcp = (Uint32 *)(from);				\
5139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *dstp = (Uint32 *)(to);					\
5149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i = 0x00fefefe;						\
5159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm4);						\
5169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm4, mm4);					\
5179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = 0x00010101;							\
5189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        movd_m2r(*(&i), mm3);						\
5199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        punpckldq_r2r(mm3, mm3);					\
5209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        i = (int)(length);						\
5219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        if( i & 1 ) {							\
5229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  Uint32 s = *srcp++;						\
5239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  Uint32 d = *dstp;						\
5249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
5259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		     + (s & d & 0x00010101);				\
5269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	  i--;								\
5279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
5289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(; i > 0; --i) {						\
5299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    movq_m2r((*dstp), mm2); /* dst -> mm2 */			\
5309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    movq_r2r(mm2, mm6);	/* dst -> mm6 */			\
5319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    movq_m2r((*srcp), mm1); /* src -> mm1 */			\
5329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    movq_r2r(mm1, mm5);	/* src -> mm5 */			\
5339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    pand_r2r(mm4, mm6);	/* dst & 0x00fefefe -> mm6 */		\
5349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */		\
5359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */	\
5369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    psrld_i2r(1, mm5);						\
5379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    pand_r2r(mm1, mm2);	/* s & d -> mm2 */			\
5389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    pand_r2r(mm3, mm2);	/* s & d & 0x00010101 -> mm2 */		\
5399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    paddd_r2r(mm5, mm2);					\
5409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    movq_r2m(mm2, (*dstp));					\
5419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dstp += 2;							\
5429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    srcp += 2;							\
5439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    i--;							\
5449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
5459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	emms();								\
5469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
5479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
5499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
5519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Special case: 50% alpha (alpha=128)
5529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * This is treated specially because it can be optimized very well, and
5539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * since it is good for many cases of semi-translucency.
5549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * The theory is to do all three components at the same time:
5559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * First zero the lowest bit of each component, which gives us room to
5569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * add them. Then shift right and add the sum of the lowest bits.
5579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
5589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)		\
5599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								\
5609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        int i;								\
5619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *src = (Uint32 *)(from);					\
5629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *dst = (Uint32 *)(to);					\
5639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(i = 0; i < (int)(length); i++) {				\
5649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 s = *src++;						\
5659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint32 d = *dst;						\
5669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
5679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		     + (s & d & 0x00010101);				\
5689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
5699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
5709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
5729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * For 16bpp, we can actually blend two pixels in parallel, if we take
5739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * care to shift before we add, not after.
5749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
5759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* helper: blend a single 16 bit pixel at 50% */
5779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define BLEND16_50(dst, src, mask)			\
5789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {						\
5799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 s = *src++;				\
5809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 d = *dst;				\
5819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +	\
5829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	                  (s & d & (~mask & 0xffff)));		\
5839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
5849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
5859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* basic 16bpp blender. mask is the pixels to keep when adding. */
5869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)		\
5879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								\
5889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned n = (length);						\
5899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *src = (Uint16 *)(from);					\
5909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 *dst = (Uint16 *)(to);					\
5919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {			\
5929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* source and destination not in phase, blit one by one */	\
5939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    while(n--)							\
5949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		BLEND16_50(dst, src, mask);				\
5959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {							\
5969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if((uintptr_t)src & 3) {					\
5979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* first odd pixel */					\
5989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		BLEND16_50(dst, src, mask);				\
5999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		n--;							\
6009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
6019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    for(; n > 1; n -= 2) {					\
6029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		Uint32 s = *(Uint32 *)src;				\
6039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		Uint32 d = *(Uint32 *)dst;				\
6049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		*(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)	\
6059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		               + ((d & (mask | mask << 16)) >> 1)	\
6069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		               + (s & d & (~(mask | mask << 16)));	\
6079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		src += 2;						\
6089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		dst += 2;						\
6099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
6109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(n)							\
6119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		BLEND16_50(dst, src, mask); /* last odd pixel */	\
6129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
6139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
6149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)	\
6169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
6179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
6199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
6209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#ifdef MMX_ASMBLIT
6229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
6239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define CHOOSE_BLIT(blitter, alpha, fmt)				\
6249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								\
6259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        if(alpha == 255) {						\
6269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    switch(fmt->BytesPerPixel) {				\
6279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
6289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
6299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
6309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
6319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
6329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {							\
6339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    switch(fmt->BytesPerPixel) {				\
6349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 1:							\
6359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* No 8bpp alpha blitting */				\
6369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
6379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
6389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 2:							\
6399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
6409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 0xffff:						\
6419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(fmt->Gmask == 0x07e0				\
6429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Rmask == 0x07e0				\
6439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Bmask == 0x07e0) {			\
6449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(alpha == 128)				\
6459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
6469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			else {						\
6479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    if(SDL_HasMMX())				\
6489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(2, Uint8, ALPHA_BLIT16_565MMX);	\
6499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    else					\
6509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(2, Uint8, ALPHA_BLIT16_565);	\
6519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			}						\
6529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } else						\
6539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			goto general16;					\
6549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    break;						\
6559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
6569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 0x7fff:						\
6579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(fmt->Gmask == 0x03e0				\
6589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Rmask == 0x03e0				\
6599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Bmask == 0x03e0) {			\
6609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(alpha == 128)				\
6619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
6629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			else {						\
6639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    if(SDL_HasMMX())				\
6649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(2, Uint8, ALPHA_BLIT16_555MMX);	\
6659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    else					\
6669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(2, Uint8, ALPHA_BLIT16_555);	\
6679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			}						\
6689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			break;						\
6699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							\
6709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* fallthrough */					\
6719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
6729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		default:						\
6739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		general16:						\
6749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
6759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							\
6769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
6779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
6789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 3:							\
6799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
6809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
6819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
6829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 4:							\
6839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
6849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
6859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Bmask == 0xff00)) {			\
6869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(alpha == 128)					\
6879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    {							\
6889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(SDL_HasMMX())				\
6899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\
6909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			else						\
6919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(4, Uint16, ALPHA_BLIT32_888_50);\
6929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							\
6939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    else						\
6949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    {							\
6959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(SDL_HasMMX())				\
6969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(4, Uint16, ALPHA_BLIT32_888MMX);\
6979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			else						\
6989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				blitter(4, Uint16, ALPHA_BLIT32_888);	\
6999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							\
7009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} else							\
7019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
7029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
7039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
7049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
7059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
7069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else
7089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define CHOOSE_BLIT(blitter, alpha, fmt)				\
7109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								\
7119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        if(alpha == 255) {						\
7129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    switch(fmt->BytesPerPixel) {				\
7139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
7149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
7159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
7169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
7179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
7189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {							\
7199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    switch(fmt->BytesPerPixel) {				\
7209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 1:							\
7219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* No 8bpp alpha blitting */				\
7229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
7239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
7249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 2:							\
7259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
7269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 0xffff:						\
7279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(fmt->Gmask == 0x07e0				\
7289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Rmask == 0x07e0				\
7299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Bmask == 0x07e0) {			\
7309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(alpha == 128)				\
7319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
7329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			else {						\
7339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    blitter(2, Uint8, ALPHA_BLIT16_565);	\
7349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			}						\
7359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } else						\
7369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			goto general16;					\
7379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    break;						\
7389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
7399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 0x7fff:						\
7409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(fmt->Gmask == 0x03e0				\
7419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Rmask == 0x03e0				\
7429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Bmask == 0x03e0) {			\
7439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(alpha == 128)				\
7449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
7459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			else {						\
7469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    blitter(2, Uint8, ALPHA_BLIT16_555);	\
7479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			}						\
7489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			break;						\
7499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							\
7509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* fallthrough */					\
7519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
7529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		default:						\
7539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		general16:						\
7549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
7559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							\
7569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
7579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
7589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 3:							\
7599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
7609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
7619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall									\
7629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    case 4:							\
7639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
7649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
7659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       || fmt->Bmask == 0xff00)) {			\
7669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(alpha == 128)					\
7679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			blitter(4, Uint16, ALPHA_BLIT32_888_50);	\
7689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    else						\
7699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			blitter(4, Uint16, ALPHA_BLIT32_888);		\
7709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} else							\
7719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
7729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							\
7739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								\
7749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								\
7759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
7769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
7789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
7809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * This takes care of the case when the surface is clipped on the left and/or
7819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * right. Top clipping has already been taken care of.
7829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
7839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void RLEClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst,
7849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			Uint8 *dstbuf, SDL_Rect *srcrect, unsigned alpha)
7859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
7869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_PixelFormat *fmt = dst->format;
7879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
7889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define RLECLIPBLIT(bpp, Type, do_blit)					   \
7899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								   \
7909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int linecount = srcrect->h;					   \
7919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int ofs = 0;							   \
7929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int left = srcrect->x;						   \
7939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int right = left + srcrect->w;					   \
7949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dstbuf -= left * bpp;						   \
7959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(;;) {							   \
7969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int run;							   \
7979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ofs += *(Type *)srcbuf;					   \
7989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    run = ((Type *)srcbuf)[1];					   \
7999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    srcbuf += 2 * sizeof(Type);					   \
8009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(run) {							   \
8019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* clip to left and right borders */			   \
8029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(ofs < right) {					   \
8039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int start = 0;					   \
8049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int len = run;					   \
8059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int startcol;					   \
8069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(left - ofs > 0) {				   \
8079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			start = left - ofs;				   \
8089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			len -= start;					   \
8099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(len <= 0)					   \
8109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    goto nocopy ## bpp ## do_blit;		   \
8119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							   \
8129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    startcol = ofs + start;				   \
8139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(len > right - startcol)				   \
8149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			len = right - startcol;				   \
8159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
8169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    len, bpp, alpha);				   \
8179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							   \
8189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    nocopy ## bpp ## do_blit:					   \
8199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += run * bpp;					   \
8209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += run;						   \
8219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else if(!ofs)						   \
8229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		break;							   \
8239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(ofs == w) {						   \
8249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs = 0;						   \
8259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		dstbuf += dst->pitch;					   \
8269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(!--linecount)					   \
8279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    break;						   \
8289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }								   \
8299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}								   \
8309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
8319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
8339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef RLECLIPBLIT
8359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
8379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* blit a colorkeyed RLE surface */
8409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallint SDL_RLEBlit(SDL_Surface *src, SDL_Rect *srcrect,
8419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_Surface *dst, SDL_Rect *dstrect)
8429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
8439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *dstbuf;
8449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *srcbuf;
8459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int x, y;
8469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int w = src->w;
8479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned alpha;
8489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Lock the destination if necessary */
8509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( SDL_MUSTLOCK(dst) ) {
8519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if ( SDL_LockSurface(dst) < 0 ) {
8529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			return(-1);
8539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
8549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
8559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Set up the source and destination pointers */
8579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	x = dstrect->x;
8589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	y = dstrect->y;
8599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dstbuf = (Uint8 *)dst->pixels
8609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	         + y * dst->pitch + x * src->format->BytesPerPixel;
8619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	srcbuf = (Uint8 *)src->map->sw_data->aux_data;
8629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{
8649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* skip lines at the top if neccessary */
8659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int vskip = srcrect->y;
8669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int ofs = 0;
8679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(vskip) {
8689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define RLESKIP(bpp, Type)			\
8709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		for(;;) {			\
8719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int run;			\
8729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs += *(Type *)srcbuf;	\
8739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run = ((Type *)srcbuf)[1];	\
8749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += sizeof(Type) * 2;	\
8759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(run) {			\
8769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			srcbuf += run * bpp;	\
8779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += run;		\
8789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } else if(!ofs)		\
8799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			goto done;		\
8809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(ofs == w) {		\
8819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs = 0;		\
8829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(!--vskip)		\
8839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    break;		\
8849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }				\
8859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
8869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		switch(src->format->BytesPerPixel) {
8889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 1: RLESKIP(1, Uint8); break;
8899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 2: RLESKIP(2, Uint8); break;
8909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 3: RLESKIP(3, Uint8); break;
8919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		case 4: RLESKIP(4, Uint16); break;
8929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
8939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef RLESKIP
8959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }
8979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
8989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
8999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	alpha = (src->flags & SDL_SRCALPHA) == SDL_SRCALPHA
9009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	        ? src->format->alpha : 255;
9019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* if left or right edge clipping needed, call clip blit */
9029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( srcrect->x || srcrect->w != src->w ) {
9039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
9049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {
9059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    SDL_PixelFormat *fmt = src->format;
9069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define RLEBLIT(bpp, Type, do_blit)					      \
9089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {							      \
9099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int linecount = srcrect->h;				      \
9109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int ofs = 0;						      \
9119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		for(;;) {						      \
9129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    unsigned run;					      \
9139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs += *(Type *)srcbuf;				      \
9149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run = ((Type *)srcbuf)[1];				      \
9159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += 2 * sizeof(Type);				      \
9169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(run) {						      \
9179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
9189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			srcbuf += run * bpp;				      \
9199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += run;					      \
9209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } else if(!ofs)					      \
9219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			break;						      \
9229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(ofs == w) {					      \
9239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs = 0;					      \
9249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			dstbuf += dst->pitch;				      \
9259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(!--linecount)				      \
9269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    break;					      \
9279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							      \
9289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							      \
9299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(0)
9309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    CHOOSE_BLIT(RLEBLIT, alpha, fmt);
9329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef RLEBLIT
9349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
9359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Halldone:
9379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Unlock the destination if necessary */
9389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( SDL_MUSTLOCK(dst) ) {
9399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_UnlockSurface(dst);
9409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
9419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return(0);
9429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
9439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef OPAQUE_BLIT
9459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
9479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Per-pixel blitting macros for translucent pixels:
9489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * These use the same techniques as the per-surface blitting macros
9499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
9509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
9529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * For 32bpp pixels, we have made sure the alpha is stored in the top
9539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * 8 bits, so proceed as usual
9549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
9559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define BLIT_TRANSL_888(src, dst)				\
9569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {							\
9579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        Uint32 s = src;						\
9589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 d = dst;						\
9599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned alpha = s >> 24;				\
9609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 s1 = s & 0xff00ff;				\
9619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 d1 = d & 0xff00ff;				\
9629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
9639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	s &= 0xff00;						\
9649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d &= 0xff00;						\
9659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
9669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst = d1 | d;						\
9679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
9689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
9709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * For 16bpp pixels, we have stored the 5 most significant alpha bits in
9719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * bits 5-10. As before, we can process all 3 RGB components at the same time.
9729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
9739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define BLIT_TRANSL_565(src, dst)		\
9749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {					\
9759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 s = src;				\
9769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 d = dst;				\
9779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned alpha = (s & 0x3e0) >> 5;	\
9789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	s &= 0x07e0f81f;			\
9799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d = (d | d << 16) & 0x07e0f81f;		\
9809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d += (s - d) * alpha >> 5;		\
9819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d &= 0x07e0f81f;			\
9829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst = (Uint16)(d | d >> 16);			\
9839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
9849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define BLIT_TRANSL_555(src, dst)		\
9869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {					\
9879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 s = src;				\
9889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 d = dst;				\
9899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned alpha = (s & 0x3e0) >> 5;	\
9909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	s &= 0x03e07c1f;			\
9919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d = (d | d << 16) & 0x03e07c1f;		\
9929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d += (s - d) * alpha >> 5;		\
9939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d &= 0x03e07c1f;			\
9949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst = (Uint16)(d | d >> 16);			\
9959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
9969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
9979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* used to save the destination format in the encoding. Designed to be
9989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall   macro-compatible with SDL_PixelFormat but without the unneeded fields */
9999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Halltypedef struct {
10009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  BytesPerPixel;
10019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Rloss;
10029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Gloss;
10039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Bloss;
10049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Rshift;
10059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Gshift;
10069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Bshift;
10079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8  Ashift;
10089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 Rmask;
10099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 Gmask;
10109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 Bmask;
10119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 Amask;
10129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall} RLEDestFormat;
10139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
10149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
10159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic void RLEAlphaClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst,
10169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			     Uint8 *dstbuf, SDL_Rect *srcrect)
10179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
10189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_PixelFormat *df = dst->format;
10199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /*
10209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall     * clipped blitter: Ptype is the destination pixel type,
10219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall     * Ctype the translucent count type, and do_blend the macro
10229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall     * to blend one pixel.
10239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall     */
10249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)			  \
10259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    do {								  \
10269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int linecount = srcrect->h;					  \
10279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int left = srcrect->x;						  \
10289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int right = left + srcrect->w;					  \
10299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dstbuf -= left * sizeof(Ptype);					  \
10309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	do {								  \
10319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int ofs = 0;						  \
10329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* blit opaque pixels on one line */			  \
10339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {							  \
10349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		unsigned run;						  \
10359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += ((Ctype *)srcbuf)[0];				  \
10369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = ((Ctype *)srcbuf)[1];				  \
10379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += 2 * sizeof(Ctype);				  \
10389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(run) {						  \
10399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* clip to left and right borders */		  \
10409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int cofs = ofs;					  \
10419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int crun = run;					  \
10429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(left - cofs > 0) {				  \
10439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			crun -= left - cofs;				  \
10449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			cofs = left;					  \
10459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							  \
10469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(crun > right - cofs)				  \
10479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			crun = right - cofs;				  \
10489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(crun > 0)					  \
10499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),	  \
10509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				   srcbuf + (cofs - ofs) * sizeof(Ptype), \
10519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				   (unsigned)crun, sizeof(Ptype));	  \
10529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += run * sizeof(Ptype);			  \
10539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs += run;						  \
10549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} else if(!ofs)						  \
10559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    return;						  \
10569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(ofs < w);						  \
10579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* skip padding if necessary */				  \
10589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(sizeof(Ptype) == 2)					  \
10599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += (uintptr_t)srcbuf & 2;			  \
10609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* blit translucent pixels on the same line */		  \
10619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ofs = 0;							  \
10629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {							  \
10639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		unsigned run;						  \
10649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += ((Uint16 *)srcbuf)[0];				  \
10659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = ((Uint16 *)srcbuf)[1];				  \
10669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += 4;						  \
10679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(run) {						  \
10689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* clip to left and right borders */		  \
10699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int cofs = ofs;					  \
10709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    int crun = run;					  \
10719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(left - cofs > 0) {				  \
10729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			crun -= left - cofs;				  \
10739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			cofs = left;					  \
10749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							  \
10759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(crun > right - cofs)				  \
10769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			crun = right - cofs;				  \
10779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(crun > 0) {					  \
10789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			Ptype *dst = (Ptype *)dstbuf + cofs;		  \
10799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);	  \
10809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			int i;						  \
10819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			for(i = 0; i < crun; i++)			  \
10829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    do_blend(src[i], dst[i]);			  \
10839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							  \
10849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += run * 4;					  \
10859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs += run;						  \
10869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}							  \
10879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(ofs < w);						  \
10889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dstbuf += dst->pitch;					  \
10899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} while(--linecount);						  \
10909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } while(0)
10919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
10929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    switch(df->BytesPerPixel) {
10939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    case 2:
10949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0
10959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   || df->Bmask == 0x07e0)
10969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
10979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	else
10989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
10999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	break;
11009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    case 4:
11019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
11029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	break;
11039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
11049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
11059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* blit a pixel-alpha RLE surface */
11079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallint SDL_RLEAlphaBlit(SDL_Surface *src, SDL_Rect *srcrect,
11089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		     SDL_Surface *dst, SDL_Rect *dstrect)
11099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
11109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int x, y;
11119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int w = src->w;
11129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint8 *srcbuf, *dstbuf;
11139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_PixelFormat *df = dst->format;
11149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* Lock the destination if necessary */
11169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if ( SDL_MUSTLOCK(dst) ) {
11179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( SDL_LockSurface(dst) < 0 ) {
11189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    return -1;
11199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
11209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
11219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    x = dstrect->x;
11239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    y = dstrect->y;
11249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    dstbuf = (Uint8 *)dst->pixels
11259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	     + y * dst->pitch + x * df->BytesPerPixel;
11269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    srcbuf = (Uint8 *)src->map->sw_data->aux_data + sizeof(RLEDestFormat);
11279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    {
11299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* skip lines at the top if necessary */
11309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int vskip = srcrect->y;
11319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(vskip) {
11329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int ofs;
11339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(df->BytesPerPixel == 2) {
11349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* the 16/32 interleaved format */
11359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		do {
11369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* skip opaque line */
11379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs = 0;
11389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    do {
11399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			int run;
11409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += srcbuf[0];
11419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			run = srcbuf[1];
11429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			srcbuf += 2;
11439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(run) {
11449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    srcbuf += 2 * run;
11459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    ofs += run;
11469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			} else if(!ofs)
11479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    goto done;
11489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } while(ofs < w);
11499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* skip padding */
11519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += (uintptr_t)srcbuf & 2;
11529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* skip translucent line */
11549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs = 0;
11559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    do {
11569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			int run;
11579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += ((Uint16 *)srcbuf)[0];
11589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			run = ((Uint16 *)srcbuf)[1];
11599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			srcbuf += 4 * (run + 1);
11609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += run;
11619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } while(ofs < w);
11629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} while(--vskip);
11639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else {
11649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* the 32/32 interleaved format */
11659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		vskip <<= 1;	/* opaque and translucent have same format */
11669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		do {
11679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs = 0;
11689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    do {
11699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			int run;
11709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += ((Uint16 *)srcbuf)[0];
11719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			run = ((Uint16 *)srcbuf)[1];
11729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			srcbuf += 4;
11739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			if(run) {
11749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    srcbuf += 4 * run;
11759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    ofs += run;
11769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			} else if(!ofs)
11779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    goto done;
11789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } while(ofs < w);
11799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} while(--vskip);
11809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }
11819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
11829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
11839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* if left or right edge clipping needed, call clip blit */
11859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if(srcrect->x || srcrect->w != src->w) {
11869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
11879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } else {
11889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
11899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/*
11909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 * non-clipped blitter. Ptype is the destination pixel type,
11919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 * Ctype the translucent count type, and do_blend the
11929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 * macro to blend one pixel.
11939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	 */
11949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define RLEALPHABLIT(Ptype, Ctype, do_blend)				 \
11959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	do {								 \
11969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int linecount = srcrect->h;					 \
11979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {							 \
11989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int ofs = 0;						 \
11999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* blit opaque pixels on one line */			 \
12009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		do {							 \
12019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    unsigned run;					 \
12029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs += ((Ctype *)srcbuf)[0];			 \
12039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run = ((Ctype *)srcbuf)[1];				 \
12049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += 2 * sizeof(Ctype);			 \
12059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(run) {						 \
12069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
12079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				   run, sizeof(Ptype));			 \
12089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			srcbuf += run * sizeof(Ptype);			 \
12099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += run;					 \
12109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    } else if(!ofs)					 \
12119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			goto done;					 \
12129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} while(ofs < w);					 \
12139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* skip padding if necessary */				 \
12149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(sizeof(Ptype) == 2)					 \
12159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += (uintptr_t)srcbuf & 2;		 	 \
12169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* blit translucent pixels on the same line */		 \
12179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs = 0;						 \
12189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		do {							 \
12199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    unsigned run;					 \
12209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ofs += ((Uint16 *)srcbuf)[0];			 \
12219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run = ((Uint16 *)srcbuf)[1];			 \
12229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    srcbuf += 4;					 \
12239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    if(run) {						 \
12249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			Ptype *dst = (Ptype *)dstbuf + ofs;		 \
12259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			unsigned i;					 \
12269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			for(i = 0; i < run; i++) {			 \
12279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    Uint32 src = *(Uint32 *)srcbuf;		 \
12289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    do_blend(src, *dst);			 \
12299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    srcbuf += 4;				 \
12309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    dst++;					 \
12319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			}						 \
12329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			ofs += run;					 \
12339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    }							 \
12349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		} while(ofs < w);					 \
12359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		dstbuf += dst->pitch;					 \
12369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(--linecount);					 \
12379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} while(0)
12389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
12399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	switch(df->BytesPerPixel) {
12409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 2:
12419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0
12429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	       || df->Bmask == 0x07e0)
12439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
12449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    else
12459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
12469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
12479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 4:
12489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
12499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
12509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
12519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
12529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
12539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall done:
12549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* Unlock the destination if necessary */
12559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if ( SDL_MUSTLOCK(dst) ) {
12569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	SDL_UnlockSurface(dst);
12579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
12589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return 0;
12599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
12609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
12619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
12629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Auxiliary functions:
12639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * The encoding functions take 32bpp rgb + a, and
12649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * return the number of bytes copied to the destination.
12659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * The decoding functions copy to 32bpp rgb + a, and
12669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * return the number of bytes copied from the source.
12679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * These are only used in the encoder and un-RLE code and are therefore not
12689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * highly optimised.
12699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
12709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
12719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
12729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int copy_opaque_16(void *dst, Uint32 *src, int n,
12739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			  SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
12749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
12759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
12769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint16 *d = dst;
12779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
12789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b;
12799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGB_FROM_PIXEL(*src, sfmt, r, g, b);
12809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGB(*d, dfmt, r, g, b);
12819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	src++;
12829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d++;
12839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
12849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 2;
12859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
12869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
12879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* decode opaque pixels from 16bpp to 32bpp rgb + a */
12889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int uncopy_opaque_16(Uint32 *dst, void *src, int n,
12899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    RLEDestFormat *sfmt, SDL_PixelFormat *dfmt)
12909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
12919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
12929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint16 *s = src;
12939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    unsigned alpha = dfmt->Amask ? 255 : 0;
12949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
12959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b;
12969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGB_FROM_PIXEL(*s, sfmt, r, g, b);
12979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
12989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	s++;
12999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst++;
13009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
13019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 2;
13029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
13039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
13079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int copy_transl_565(void *dst, Uint32 *src, int n,
13089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			   SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
13099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
13109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
13119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint32 *d = dst;
13129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
13139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b, a;
13149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 pix;
13159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
13169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGB(pix, dfmt, r, g, b);
13179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	*d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
13189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	src++;
13199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d++;
13209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
13219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 4;
13229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
13239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
13259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int copy_transl_555(void *dst, Uint32 *src, int n,
13269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			   SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
13279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
13289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
13299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint32 *d = dst;
13309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
13319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b, a;
13329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint16 pix;
13339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
13349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGB(pix, dfmt, r, g, b);
13359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	*d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
13369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	src++;
13379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	d++;
13389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
13399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 4;
13409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
13419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
13439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int uncopy_transl_16(Uint32 *dst, void *src, int n,
13449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			    RLEDestFormat *sfmt, SDL_PixelFormat *dfmt)
13459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
13469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
13479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint32 *s = src;
13489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
13499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b, a;
13509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 pix = *s++;
13519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	a = (pix & 0x3e0) >> 2;
13529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	pix = (pix & ~0x3e0) | pix >> 16;
13539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGB_FROM_PIXEL(pix, sfmt, r, g, b);
13549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
13559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst++;
13569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
13579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 4;
13589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
13599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
13619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int copy_32(void *dst, Uint32 *src, int n,
13629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		   SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
13639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
13649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
13659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint32 *d = dst;
13669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
13679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b, a;
13689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 pixel;
13699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
13709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGB(pixel, dfmt, r, g, b);
13719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	*d++ = pixel | a << 24;
13729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	src++;
13739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
13749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 4;
13759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
13769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
13789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int uncopy_32(Uint32 *dst, void *src, int n,
13799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		     RLEDestFormat *sfmt, SDL_PixelFormat *dfmt)
13809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
13819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int i;
13829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint32 *s = src;
13839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(i = 0; i < n; i++) {
13849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	unsigned r, g, b, a;
13859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 pixel = *s++;
13869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
13879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	a = pixel >> 24;
13889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
13899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst++;
13909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
13919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return n * 4;
13929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
13939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
13959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ISTRANSL(pixel, fmt)	\
13979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
13989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
13999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/* convert surface to be quickly alpha-blittable onto dest, if possible */
14009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int RLEAlphaSurface(SDL_Surface *surface)
14019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
14029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_Surface *dest;
14039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_PixelFormat *df;
14049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int maxsize = 0;
14059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int max_opaque_run;
14069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int max_transl_run = 65535;
14079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    unsigned masksum;
14089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint8 *rlebuf, *dst;
14099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int (*copy_opaque)(void *, Uint32 *, int,
14109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       SDL_PixelFormat *, SDL_PixelFormat *);
14119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int (*copy_transl)(void *, Uint32 *, int,
14129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		       SDL_PixelFormat *, SDL_PixelFormat *);
14139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    dest = surface->map->dst;
14159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if(!dest)
14169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return -1;
14179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    df = dest->format;
14189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if(surface->format->BitsPerPixel != 32)
14199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return -1;		/* only 32bpp source supported */
14209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* find out whether the destination is one we support,
14229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall       and determine the max size of the encoded result */
14239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    masksum = df->Rmask | df->Gmask | df->Bmask;
14249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    switch(df->BytesPerPixel) {
14259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    case 2:
14269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* 16bpp: only support 565 and 555 formats */
14279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	switch(masksum) {
14289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 0xffff:
14299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(df->Gmask == 0x07e0
14309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	       || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
14319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		copy_opaque = copy_opaque_16;
14329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		copy_transl = copy_transl_565;
14339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else
14349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		return -1;
14359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
14369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 0x7fff:
14379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(df->Gmask == 0x03e0
14389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	       || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
14399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		copy_opaque = copy_opaque_16;
14409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		copy_transl = copy_transl_555;
14419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else
14429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		return -1;
14439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
14449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	default:
14459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    return -1;
14469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
14479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	max_opaque_run = 255;	/* runs stored as bytes */
14489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* worst case is alternating opaque and translucent pixels,
14509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   with room for alignment padding between lines */
14519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
14529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	break;
14539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    case 4:
14549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(masksum != 0x00ffffff)
14559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    return -1;		/* requires unused high byte */
14569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	copy_opaque = copy_32;
14579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	copy_transl = copy_32;
14589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	max_opaque_run = 255;	/* runs stored as short ints */
14599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* worst case is alternating opaque and translucent pixels */
14619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
14629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	break;
14639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    default:
14649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return -1;		/* anything else unsupported right now */
14659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
14669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    maxsize += sizeof(RLEDestFormat);
14689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    rlebuf = (Uint8 *)SDL_malloc(maxsize);
14699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if(!rlebuf) {
14709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	SDL_OutOfMemory();
14719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return -1;
14729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
14739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    {
14749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* save the destination format so we can undo the encoding later */
14759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	RLEDestFormat *r = (RLEDestFormat *)rlebuf;
14769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->BytesPerPixel = df->BytesPerPixel;
14779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Rloss = df->Rloss;
14789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Gloss = df->Gloss;
14799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Bloss = df->Bloss;
14809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Rshift = df->Rshift;
14819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Gshift = df->Gshift;
14829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Bshift = df->Bshift;
14839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Ashift = df->Ashift;
14849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Rmask = df->Rmask;
14859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Gmask = df->Gmask;
14869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Bmask = df->Bmask;
14879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	r->Amask = df->Amask;
14889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
14899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    dst = rlebuf + sizeof(RLEDestFormat);
14909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* Do the actual encoding */
14929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    {
14939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int x, y;
14949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int h = surface->h, w = surface->w;
14959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	SDL_PixelFormat *sf = surface->format;
14969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 *src = (Uint32 *)surface->pixels;
14979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *lastline = dst;	/* end of last non-blank line */
14989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
14999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* opaque counts are 8 or 16 bits, depending on target depth */
15009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ADD_OPAQUE_COUNTS(n, m)			\
15019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(df->BytesPerPixel == 4) {		\
15029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ((Uint16 *)dst)[0] = n;		\
15039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ((Uint16 *)dst)[1] = m;		\
15049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst += 4;				\
15059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {				\
15069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst[0] = n;				\
15079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst[1] = m;				\
15089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst += 2;				\
15099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
15109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* translucent counts are always 16 bit */
15129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ADD_TRANSL_COUNTS(n, m)		\
15139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	(((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
15149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(y = 0; y < h; y++) {
15169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int runstart, skipstart;
15179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int blankline = 0;
15189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* First encode all opaque pixels of a scan line */
15199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    x = 0;
15209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {
15219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int run, skip, len;
15229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		skipstart = x;
15239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(x < w && !ISOPAQUE(src[x], sf))
15249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    x++;
15259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		runstart = x;
15269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(x < w && ISOPAQUE(src[x], sf))
15279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    x++;
15289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		skip = runstart - skipstart;
15299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(skip == w)
15309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    blankline = 1;
15319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = x - runstart;
15329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(skip > max_opaque_run) {
15339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ADD_OPAQUE_COUNTS(max_opaque_run, 0);
15349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    skip -= max_opaque_run;
15359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
15369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		len = MIN(run, max_opaque_run);
15379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ADD_OPAQUE_COUNTS(skip, len);
15389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		dst += copy_opaque(dst, src + runstart, len, sf, df);
15399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		runstart += len;
15409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run -= len;
15419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(run) {
15429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    len = MIN(run, max_opaque_run);
15439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ADD_OPAQUE_COUNTS(0, len);
15449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst += copy_opaque(dst, src + runstart, len, sf, df);
15459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    runstart += len;
15469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run -= len;
15479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
15489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(x < w);
15499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* Make sure the next output address is 32-bit aligned */
15519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst += (uintptr_t)dst & 2;
15529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* Next, encode all translucent pixels of the same scan line */
15549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    x = 0;
15559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {
15569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int run, skip, len;
15579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		skipstart = x;
15589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(x < w && !ISTRANSL(src[x], sf))
15599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    x++;
15609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		runstart = x;
15619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(x < w && ISTRANSL(src[x], sf))
15629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    x++;
15639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		skip = runstart - skipstart;
15649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		blankline &= (skip == w);
15659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = x - runstart;
15669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(skip > max_transl_run) {
15679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ADD_TRANSL_COUNTS(max_transl_run, 0);
15689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    skip -= max_transl_run;
15699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
15709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		len = MIN(run, max_transl_run);
15719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ADD_TRANSL_COUNTS(skip, len);
15729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		dst += copy_transl(dst, src + runstart, len, sf, df);
15739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		runstart += len;
15749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run -= len;
15759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(run) {
15769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    len = MIN(run, max_transl_run);
15779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ADD_TRANSL_COUNTS(0, len);
15789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst += copy_transl(dst, src + runstart, len, sf, df);
15799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    runstart += len;
15809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run -= len;
15819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
15829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(!blankline)
15839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    lastline = dst;
15849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(x < w);
15859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    src += surface->pitch >> 2;
15879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
15889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst = lastline;		/* back up past trailing blank lines */
15899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ADD_OPAQUE_COUNTS(0, 0);
15909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
15919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ADD_OPAQUE_COUNTS
15939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ADD_TRANSL_COUNTS
15949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
15959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* Now that we have it encoded, release the original pixels */
15969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC
15979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall       && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) {
15989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	SDL_free( surface->pixels );
15999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	surface->pixels = NULL;
16009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
16019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* realloc the buffer to release unused memory */
16039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    {
16049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
16059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(!p)
16069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    p = rlebuf;
16079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	surface->map->sw_data->aux_data = p;
16089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
16099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return 0;
16119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
16129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic Uint32 getpix_8(Uint8 *srcbuf)
16149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
16159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return *srcbuf;
16169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
16179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic Uint32 getpix_16(Uint8 *srcbuf)
16199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
16209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return *(Uint16 *)srcbuf;
16219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
16229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic Uint32 getpix_24(Uint8 *srcbuf)
16249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
16259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#if SDL_BYTEORDER == SDL_LIL_ENDIAN
16269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
16279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#else
16289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
16299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#endif
16309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
16319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic Uint32 getpix_32(Uint8 *srcbuf)
16339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
16349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return *(Uint32 *)srcbuf;
16359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
16369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Halltypedef Uint32 (*getpix_func)(Uint8 *);
16389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic getpix_func getpixes[4] = {
16409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    getpix_8, getpix_16, getpix_24, getpix_32
16419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall};
16429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic int RLEColorkeySurface(SDL_Surface *surface)
16449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
16459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        Uint8 *rlebuf, *dst;
16469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int maxn;
16479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int y;
16489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint8 *srcbuf, *lastline;
16499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int maxsize = 0;
16509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int bpp = surface->format->BytesPerPixel;
16519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	getpix_func getpix;
16529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	Uint32 ckey, rgbmask;
16539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int w, h;
16549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* calculate the worst case size for the compressed surface */
16569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	switch(bpp) {
16579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 1:
16589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* worst case is alternating opaque and transparent pixels,
16599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	       starting with an opaque pixel */
16609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
16619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
16629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 2:
16639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 3:
16649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* worst case is solid runs, at most 255 pixels wide */
16659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    maxsize = surface->h * (2 * (surface->w / 255 + 1)
16669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				    + surface->w * bpp) + 2;
16679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
16689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	case 4:
16699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* worst case is solid runs, at most 65535 pixels wide */
16709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    maxsize = surface->h * (4 * (surface->w / 65535 + 1)
16719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall				    + surface->w * 4) + 4;
16729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    break;
16739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
16749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	rlebuf = (Uint8 *)SDL_malloc(maxsize);
16769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( rlebuf == NULL ) {
16779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_OutOfMemory();
16789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		return(-1);
16799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
16809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Set up the conversion */
16829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	srcbuf = (Uint8 *)surface->pixels;
16839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	maxn = bpp == 4 ? 65535 : 255;
16849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst = rlebuf;
16859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	rgbmask = ~surface->format->Amask;
16869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ckey = surface->format->colorkey & rgbmask;
16879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	lastline = dst;
16889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	getpix = getpixes[bpp - 1];
16899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	w = surface->w;
16909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	h = surface->h;
16919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
16929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#define ADD_COUNTS(n, m)			\
16939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(bpp == 4) {				\
16949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ((Uint16 *)dst)[0] = n;		\
16959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ((Uint16 *)dst)[1] = m;		\
16969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst += 4;				\
16979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {				\
16989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst[0] = n;				\
16999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst[1] = m;				\
17009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    dst += 2;				\
17019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	for(y = 0; y < h; y++) {
17049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int x = 0;
17059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    int blankline = 0;
17069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    do {
17079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int run, skip, len;
17089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int runstart;
17099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		int skipstart = x;
17109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* find run of transparent, then opaque pixels */
17129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
17139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    x++;
17149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		runstart = x;
17159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
17169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    x++;
17179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		skip = runstart - skipstart;
17189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(skip == w)
17199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    blankline = 1;
17209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = x - runstart;
17219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* encode segment */
17239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(skip > maxn) {
17249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ADD_COUNTS(maxn, 0);
17259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    skip -= maxn;
17269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
17279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		len = MIN(run, maxn);
17289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ADD_COUNTS(skip, len);
17299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
17309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		dst += len * bpp;
17319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run -= len;
17329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		runstart += len;
17339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		while(run) {
17349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    len = MIN(run, maxn);
17359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    ADD_COUNTS(0, len);
17369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
17379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    dst += len * bpp;
17389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    runstart += len;
17399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    run -= len;
17409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
17419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if(!blankline)
17429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    lastline = dst;
17439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } while(x < w);
17449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    srcbuf += surface->pitch;
17469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst = lastline;		/* back up bast trailing blank lines */
17489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ADD_COUNTS(0, 0);
17499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall#undef ADD_COUNTS
17519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Now that we have it encoded, release the original pixels */
17539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC
17549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) {
17559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    SDL_free( surface->pixels );
17569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    surface->pixels = NULL;
17579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* realloc the buffer to release unused memory */
17609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	{
17619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    /* If realloc returns NULL, the original block is left intact */
17629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
17639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(!p)
17649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		p = rlebuf;
17659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    surface->map->sw_data->aux_data = p;
17669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return(0);
17699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
17709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallint SDL_RLESurface(SDL_Surface *surface)
17729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
17739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int retcode;
17749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Clear any previous RLE conversion */
17769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) {
17779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_UnRLESurface(surface, 1);
17789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* We don't support RLE encoding of bitmaps */
17819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( surface->format->BitsPerPixel < 8 ) {
17829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		return(-1);
17839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Lock the surface if it's in hardware */
17869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( SDL_MUSTLOCK(surface) ) {
17879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if ( SDL_LockSurface(surface) < 0 ) {
17889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			return(-1);
17899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
17909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
17919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
17929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Encode */
17939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) {
17949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    retcode = RLEColorkeySurface(surface);
17959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} else {
17969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if((surface->flags & SDL_SRCALPHA) == SDL_SRCALPHA
17979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	       && surface->format->Amask != 0)
17989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		retcode = RLEAlphaSurface(surface);
17999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    else
18009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		retcode = -1;	/* no RLE for per-surface alpha sans ckey */
18019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
18029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* Unlock the surface if it's in hardware */
18049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( SDL_MUSTLOCK(surface) ) {
18059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_UnlockSurface(surface);
18069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
18079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(retcode < 0)
18099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    return -1;
18109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* The surface is now accelerated */
18129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	surface->flags |= SDL_RLEACCEL;
18139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	return(0);
18159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
18169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall/*
18189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * Un-RLE a surface with pixel alpha
18199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * This may not give back exactly the image before RLE-encoding; all
18209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * completely transparent pixels will be lost, and colour and alpha depth
18219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall * may have been reduced (when encoding for 16bpp targets).
18229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall */
18239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallstatic SDL_bool UnRLEAlpha(SDL_Surface *surface)
18249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
18259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint8 *srcbuf;
18269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    Uint32 *dst;
18279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_PixelFormat *sf = surface->format;
18289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    RLEDestFormat *df = surface->map->sw_data->aux_data;
18299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int (*uncopy_opaque)(Uint32 *, void *, int,
18309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			 RLEDestFormat *, SDL_PixelFormat *);
18319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int (*uncopy_transl)(Uint32 *, void *, int,
18329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			 RLEDestFormat *, SDL_PixelFormat *);
18339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int w = surface->w;
18349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    int bpp = df->BytesPerPixel;
18359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if(bpp == 2) {
18379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	uncopy_opaque = uncopy_opaque_16;
18389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	uncopy_transl = uncopy_transl_16;
18399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    } else {
18409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	uncopy_opaque = uncopy_transl = uncopy_32;
18419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
18429682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18439682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    surface->pixels = SDL_malloc(surface->h * surface->pitch);
18449682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if ( !surface->pixels ) {
18459682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall        return(SDL_FALSE);
18469682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
18479682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* fill background with transparent pixels */
18489682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
18499682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18509682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    dst = surface->pixels;
18519682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    srcbuf = (Uint8 *)(df + 1);
18529682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    for(;;) {
18539682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* copy opaque pixels */
18549682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	int ofs = 0;
18559682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	do {
18569682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    unsigned run;
18579682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(bpp == 2) {
18589682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += srcbuf[0];
18599682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = srcbuf[1];
18609682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += 2;
18619682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else {
18629682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += ((Uint16 *)srcbuf)[0];
18639682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		run = ((Uint16 *)srcbuf)[1];
18649682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += 4;
18659682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }
18669682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(run) {
18679682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
18689682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += run;
18699682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else if(!ofs)
18709682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		return(SDL_TRUE);
18719682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} while(ofs < w);
18729682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18739682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* skip padding if needed */
18749682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(bpp == 2)
18759682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    srcbuf += (uintptr_t)srcbuf & 2;
18769682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18779682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	/* copy translucent pixels */
18789682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	ofs = 0;
18799682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	do {
18809682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    unsigned run;
18819682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    ofs += ((Uint16 *)srcbuf)[0];
18829682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    run = ((Uint16 *)srcbuf)[1];
18839682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    srcbuf += 4;
18849682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if(run) {
18859682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
18869682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		ofs += run;
18879682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }
18889682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	} while(ofs < w);
18899682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	dst += surface->pitch >> 2;
18909682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
18919682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    /* Make the compiler happy */
18929682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    return(SDL_TRUE);
18939682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
18949682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
18959682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hallvoid SDL_UnRLESurface(SDL_Surface *surface, int recode)
18969682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall{
18979682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) {
18989682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	surface->flags &= ~SDL_RLEACCEL;
18999682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
19009682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if(recode && (surface->flags & SDL_PREALLOC) != SDL_PREALLOC
19019682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	   && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) {
19029682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) {
19039682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_Rect full;
19049682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		unsigned alpha_flag;
19059682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
19069682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* re-create the original surface */
19079682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		surface->pixels = SDL_malloc(surface->h * surface->pitch);
19089682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if ( !surface->pixels ) {
19099682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			/* Oh crap... */
19109682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			surface->flags |= SDL_RLEACCEL;
19119682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall			return;
19129682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
19139682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
19149682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* fill it with the background colour */
19159682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_FillRect(surface, NULL, surface->format->colorkey);
19169682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
19179682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		/* now render the encoded surface */
19189682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		full.x = full.y = 0;
19199682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		full.w = surface->w;
19209682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		full.h = surface->h;
19219682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		alpha_flag = surface->flags & SDL_SRCALPHA;
19229682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		surface->flags &= ~SDL_SRCALPHA; /* opaque blit */
19239682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		SDL_RLEBlit(surface, &full, surface, &full);
19249682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		surface->flags |= alpha_flag;
19259682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    } else {
19269682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		if ( !UnRLEAlpha(surface) ) {
19279682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    /* Oh crap... */
19289682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    surface->flags |= SDL_RLEACCEL;
19299682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		    return;
19309682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall		}
19319682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    }
19329682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
19339682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
19349682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	if ( surface->map && surface->map->sw_data->aux_data ) {
19359682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    SDL_free(surface->map->sw_data->aux_data);
19369682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	    surface->map->sw_data->aux_data = NULL;
19379682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall	}
19389682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall    }
19399682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall}
19409682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
19419682c8870b8ff5e4ac2e4c70b759f791c6f38c1fJesse Hall
1942