1/*
2    SDL - Simple DirectMedia Layer
3    Copyright (C) 1997-2012 Sam Lantinga
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19    Sam Lantinga
20    slouken@libsdl.org
21*/
22#include "SDL_config.h"
23
24#include "SDL_video.h"
25#include "SDL_endian.h"
26#include "SDL_cpuinfo.h"
27#include "SDL_blit.h"
28
29/* Functions to blit from N-bit surfaces to other surfaces */
30
31#if SDL_ALTIVEC_BLITTERS
32#if __MWERKS__
33#pragma altivec_model on
34#endif
35#ifdef HAVE_ALTIVEC_H
36#include <altivec.h>
37#endif
38#define assert(X)
39#ifdef __MACOSX__
40#include <sys/sysctl.h>
41static size_t GetL3CacheSize( void )
42{
43    const char key[] = "hw.l3cachesize";
44    u_int64_t result = 0;
45    size_t typeSize = sizeof( result );
46
47
48    int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
49    if( 0 != err ) return 0;
50
51    return result;
52}
53#else
54static size_t GetL3CacheSize( void )
55{
56    /* XXX: Just guess G4 */
57    return 2097152;
58}
59#endif /* __MACOSX__ */
60
61#if (defined(__MACOSX__) && (__GNUC__ < 4))
62    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
63        (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
64    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
65        (vector unsigned short) ( a,b,c,d,e,f,g,h )
66#else
67    #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
68        (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
69    #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
70        (vector unsigned short) { a,b,c,d,e,f,g,h }
71#endif
72
73#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
74#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
75                               ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
76                                 0x04+a, 0x04+b, 0x04+c, 0x04+d, \
77                                 0x08+a, 0x08+b, 0x08+c, 0x08+d, \
78                                 0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
79
80#define MAKE8888(dstfmt, r, g, b, a)  \
81    ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
82      ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
83      ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
84      ((a<<dstfmt->Ashift)&dstfmt->Amask) )
85
86/*
87 * Data Stream Touch...Altivec cache prefetching.
88 *
89 *  Don't use this on a G5...however, the speed boost is very significant
90 *   on a G4.
91 */
92#define DST_CHAN_SRC 1
93#define DST_CHAN_DEST 2
94
95/* macro to set DST control word value... */
96#define DST_CTRL(size, count, stride) \
97    (((size) << 24) | ((count) << 16) | (stride))
98
99#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
100    ? vec_lvsl(0, src) \
101    : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
102
103/* Calculate the permute vector used for 32->32 swizzling */
104static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
105                                  const SDL_PixelFormat *dstfmt)
106{
107    /*
108    * We have to assume that the bits that aren't used by other
109     *  colors is alpha, and it's one complete byte, since some formats
110     *  leave alpha with a zero mask, but we should still swizzle the bits.
111     */
112    /* ARGB */
113    const static struct SDL_PixelFormat default_pixel_format = {
114        NULL, 0, 0,
115        0, 0, 0, 0,
116        16, 8, 0, 24,
117        0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
118        0, 0};
119    if (!srcfmt) {
120        srcfmt = &default_pixel_format;
121    }
122    if (!dstfmt) {
123        dstfmt = &default_pixel_format;
124    }
125    const vector unsigned char plus = VECUINT8_LITERAL(
126                                      0x00, 0x00, 0x00, 0x00,
127                                      0x04, 0x04, 0x04, 0x04,
128                                      0x08, 0x08, 0x08, 0x08,
129                                      0x0C, 0x0C, 0x0C, 0x0C );
130    vector unsigned char vswiz;
131    vector unsigned int srcvec;
132#define RESHIFT(X) (3 - ((X) >> 3))
133    Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
134    Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
135    Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
136    Uint32 amask;
137    /* Use zero for alpha if either surface doesn't have alpha */
138    if (dstfmt->Amask) {
139        amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
140    } else {
141        amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
142    }
143#undef RESHIFT
144    ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
145    vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
146    return(vswiz);
147}
148
149static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
150static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
151    int height = info->d_height;
152    Uint8 *src = (Uint8 *) info->s_pixels;
153    int srcskip = info->s_skip;
154    Uint8 *dst = (Uint8 *) info->d_pixels;
155    int dstskip = info->d_skip;
156    SDL_PixelFormat *srcfmt = info->src;
157    vector unsigned char valpha = vec_splat_u8(0);
158    vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
159    vector unsigned char vgmerge = VECUINT8_LITERAL(
160        0x00, 0x02, 0x00, 0x06,
161        0x00, 0x0a, 0x00, 0x0e,
162        0x00, 0x12, 0x00, 0x16,
163        0x00, 0x1a, 0x00, 0x1e);
164    vector unsigned short v1 = vec_splat_u16(1);
165    vector unsigned short v3 = vec_splat_u16(3);
166    vector unsigned short v3f = VECUINT16_LITERAL(
167        0x003f, 0x003f, 0x003f, 0x003f,
168        0x003f, 0x003f, 0x003f, 0x003f);
169    vector unsigned short vfc = VECUINT16_LITERAL(
170        0x00fc, 0x00fc, 0x00fc, 0x00fc,
171        0x00fc, 0x00fc, 0x00fc, 0x00fc);
172    vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
173    vf800 = vec_sl(vf800, vec_splat_u16(8));
174
175    while (height--) {
176        vector unsigned char valigner;
177        vector unsigned char voverflow;
178        vector unsigned char vsrc;
179
180        int width = info->d_width;
181        int extrawidth;
182
183        /* do scalar until we can align... */
184#define ONE_PIXEL_BLEND(condition, widthvar) \
185        while (condition) { \
186            Uint32 Pixel; \
187            unsigned sR, sG, sB, sA; \
188            DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
189                          sR, sG, sB, sA); \
190            *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
191                                ((sG << 3) & 0x000007E0) | \
192                                ((sB >> 3) & 0x0000001F)); \
193            dst += 2; \
194            src += 4; \
195            widthvar--; \
196        }
197
198        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
199
200        /* After all that work, here's the vector part! */
201        extrawidth = (width % 8);  /* trailing unaligned stores */
202        width -= extrawidth;
203        vsrc = vec_ld(0, src);
204        valigner = VEC_ALIGNER(src);
205
206        while (width) {
207            vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
208            vector unsigned int vsrc1, vsrc2;
209            vector unsigned char vdst;
210
211            voverflow = vec_ld(15, src);
212            vsrc = vec_perm(vsrc, voverflow, valigner);
213            vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
214            src += 16;
215            vsrc = voverflow;
216            voverflow = vec_ld(15, src);
217            vsrc = vec_perm(vsrc, voverflow, valigner);
218            vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
219            /* 1555 */
220            vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
221            vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
222            vgpixel = vec_and(vgpixel, vfc);
223            vgpixel = vec_sl(vgpixel, v3);
224            vrpixel = vec_sl(vpixel, v1);
225            vrpixel = vec_and(vrpixel, vf800);
226            vbpixel = vec_and(vpixel, v3f);
227            vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
228            /* 565 */
229            vdst = vec_or(vdst, (vector unsigned char)vbpixel);
230            vec_st(vdst, 0, dst);
231
232            width -= 8;
233            src += 16;
234            dst += 16;
235            vsrc = voverflow;
236        }
237
238        assert(width == 0);
239
240        /* do scalar until we can align... */
241        ONE_PIXEL_BLEND((extrawidth), extrawidth);
242#undef ONE_PIXEL_BLEND
243
244        src += srcskip;  /* move to next row, accounting for pitch. */
245        dst += dstskip;
246    }
247
248
249}
250
251static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
252    int height = info->d_height;
253    Uint8 *src = (Uint8 *) info->s_pixels;
254    int srcskip = info->s_skip;
255    Uint8 *dst = (Uint8 *) info->d_pixels;
256    int dstskip = info->d_skip;
257    SDL_PixelFormat *srcfmt = info->src;
258    SDL_PixelFormat *dstfmt = info->dst;
259    unsigned alpha;
260    vector unsigned char valpha;
261    vector unsigned char vpermute;
262    vector unsigned short vf800;
263    vector unsigned int v8 = vec_splat_u32(8);
264    vector unsigned int v16 = vec_add(v8, v8);
265    vector unsigned short v2 = vec_splat_u16(2);
266    vector unsigned short v3 = vec_splat_u16(3);
267    /*
268        0x10 - 0x1f is the alpha
269        0x00 - 0x0e evens are the red
270        0x01 - 0x0f odds are zero
271    */
272    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
273        0x10, 0x00, 0x01, 0x01,
274        0x10, 0x02, 0x01, 0x01,
275        0x10, 0x04, 0x01, 0x01,
276        0x10, 0x06, 0x01, 0x01
277    );
278    vector unsigned char vredalpha2 = (vector unsigned char) (
279        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
280    );
281    /*
282        0x00 - 0x0f is ARxx ARxx ARxx ARxx
283        0x11 - 0x0f odds are blue
284    */
285    vector unsigned char vblue1 = VECUINT8_LITERAL(
286        0x00, 0x01, 0x02, 0x11,
287        0x04, 0x05, 0x06, 0x13,
288        0x08, 0x09, 0x0a, 0x15,
289        0x0c, 0x0d, 0x0e, 0x17
290    );
291    vector unsigned char vblue2 = (vector unsigned char)(
292        vec_add((vector unsigned int)vblue1, v8)
293    );
294    /*
295        0x00 - 0x0f is ARxB ARxB ARxB ARxB
296        0x10 - 0x0e evens are green
297    */
298    vector unsigned char vgreen1 = VECUINT8_LITERAL(
299        0x00, 0x01, 0x10, 0x03,
300        0x04, 0x05, 0x12, 0x07,
301        0x08, 0x09, 0x14, 0x0b,
302        0x0c, 0x0d, 0x16, 0x0f
303    );
304    vector unsigned char vgreen2 = (vector unsigned char)(
305        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
306    );
307
308
309    assert(srcfmt->BytesPerPixel == 2);
310    assert(dstfmt->BytesPerPixel == 4);
311
312    vf800 = (vector unsigned short)vec_splat_u8(-7);
313    vf800 = vec_sl(vf800, vec_splat_u16(8));
314
315    if (dstfmt->Amask && srcfmt->alpha) {
316        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
317        valpha = vec_splat(valpha, 0);
318    } else {
319        alpha = 0;
320        valpha = vec_splat_u8(0);
321    }
322
323    vpermute = calc_swizzle32(NULL, dstfmt);
324    while (height--) {
325        vector unsigned char valigner;
326        vector unsigned char voverflow;
327        vector unsigned char vsrc;
328
329        int width = info->d_width;
330        int extrawidth;
331
332        /* do scalar until we can align... */
333#define ONE_PIXEL_BLEND(condition, widthvar) \
334        while (condition) { \
335            unsigned sR, sG, sB; \
336            unsigned short Pixel = *((unsigned short *)src); \
337            sR = (Pixel >> 8) & 0xf8; \
338            sG = (Pixel >> 3) & 0xfc; \
339            sB = (Pixel << 3) & 0xf8; \
340            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
341            src += 2; \
342            dst += 4; \
343            widthvar--; \
344        }
345        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
346
347        /* After all that work, here's the vector part! */
348        extrawidth = (width % 8);  /* trailing unaligned stores */
349        width -= extrawidth;
350        vsrc = vec_ld(0, src);
351        valigner = VEC_ALIGNER(src);
352
353        while (width) {
354            vector unsigned short vR, vG, vB;
355            vector unsigned char vdst1, vdst2;
356
357            voverflow = vec_ld(15, src);
358            vsrc = vec_perm(vsrc, voverflow, valigner);
359
360            vR = vec_and((vector unsigned short)vsrc, vf800);
361            vB = vec_sl((vector unsigned short)vsrc, v3);
362            vG = vec_sl(vB, v2);
363
364            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
365            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
366            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
367            vdst1 = vec_perm(vdst1, valpha, vpermute);
368            vec_st(vdst1, 0, dst);
369
370            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
371            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
372            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
373            vdst2 = vec_perm(vdst2, valpha, vpermute);
374            vec_st(vdst2, 16, dst);
375
376            width -= 8;
377            dst += 32;
378            src += 16;
379            vsrc = voverflow;
380        }
381
382        assert(width == 0);
383
384
385        /* do scalar until we can align... */
386        ONE_PIXEL_BLEND((extrawidth), extrawidth);
387#undef ONE_PIXEL_BLEND
388
389        src += srcskip;  /* move to next row, accounting for pitch. */
390        dst += dstskip;
391    }
392
393}
394
395
396static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
397    int height = info->d_height;
398    Uint8 *src = (Uint8 *) info->s_pixels;
399    int srcskip = info->s_skip;
400    Uint8 *dst = (Uint8 *) info->d_pixels;
401    int dstskip = info->d_skip;
402    SDL_PixelFormat *srcfmt = info->src;
403    SDL_PixelFormat *dstfmt = info->dst;
404    unsigned alpha;
405    vector unsigned char valpha;
406    vector unsigned char vpermute;
407    vector unsigned short vf800;
408    vector unsigned int v8 = vec_splat_u32(8);
409    vector unsigned int v16 = vec_add(v8, v8);
410    vector unsigned short v1 = vec_splat_u16(1);
411    vector unsigned short v3 = vec_splat_u16(3);
412    /*
413        0x10 - 0x1f is the alpha
414        0x00 - 0x0e evens are the red
415        0x01 - 0x0f odds are zero
416    */
417    vector unsigned char vredalpha1 = VECUINT8_LITERAL(
418        0x10, 0x00, 0x01, 0x01,
419        0x10, 0x02, 0x01, 0x01,
420        0x10, 0x04, 0x01, 0x01,
421        0x10, 0x06, 0x01, 0x01
422    );
423    vector unsigned char vredalpha2 = (vector unsigned char)(
424        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
425    );
426    /*
427        0x00 - 0x0f is ARxx ARxx ARxx ARxx
428        0x11 - 0x0f odds are blue
429    */
430    vector unsigned char vblue1 = VECUINT8_LITERAL(
431        0x00, 0x01, 0x02, 0x11,
432        0x04, 0x05, 0x06, 0x13,
433        0x08, 0x09, 0x0a, 0x15,
434        0x0c, 0x0d, 0x0e, 0x17
435    );
436    vector unsigned char vblue2 = (vector unsigned char)(
437        vec_add((vector unsigned int)vblue1, v8)
438    );
439    /*
440        0x00 - 0x0f is ARxB ARxB ARxB ARxB
441        0x10 - 0x0e evens are green
442    */
443    vector unsigned char vgreen1 = VECUINT8_LITERAL(
444        0x00, 0x01, 0x10, 0x03,
445        0x04, 0x05, 0x12, 0x07,
446        0x08, 0x09, 0x14, 0x0b,
447        0x0c, 0x0d, 0x16, 0x0f
448    );
449    vector unsigned char vgreen2 = (vector unsigned char)(
450        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
451    );
452
453
454    assert(srcfmt->BytesPerPixel == 2);
455    assert(dstfmt->BytesPerPixel == 4);
456
457    vf800 = (vector unsigned short)vec_splat_u8(-7);
458    vf800 = vec_sl(vf800, vec_splat_u16(8));
459
460    if (dstfmt->Amask && srcfmt->alpha) {
461        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
462        valpha = vec_splat(valpha, 0);
463    } else {
464        alpha = 0;
465        valpha = vec_splat_u8(0);
466    }
467
468    vpermute = calc_swizzle32(NULL, dstfmt);
469    while (height--) {
470        vector unsigned char valigner;
471        vector unsigned char voverflow;
472        vector unsigned char vsrc;
473
474        int width = info->d_width;
475        int extrawidth;
476
477        /* do scalar until we can align... */
478#define ONE_PIXEL_BLEND(condition, widthvar) \
479        while (condition) { \
480            unsigned sR, sG, sB; \
481            unsigned short Pixel = *((unsigned short *)src); \
482            sR = (Pixel >> 7) & 0xf8; \
483            sG = (Pixel >> 2) & 0xf8; \
484            sB = (Pixel << 3) & 0xf8; \
485            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
486            src += 2; \
487            dst += 4; \
488            widthvar--; \
489        }
490        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
491
492        /* After all that work, here's the vector part! */
493        extrawidth = (width % 8);  /* trailing unaligned stores */
494        width -= extrawidth;
495        vsrc = vec_ld(0, src);
496        valigner = VEC_ALIGNER(src);
497
498        while (width) {
499            vector unsigned short vR, vG, vB;
500            vector unsigned char vdst1, vdst2;
501
502            voverflow = vec_ld(15, src);
503            vsrc = vec_perm(vsrc, voverflow, valigner);
504
505            vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
506            vB = vec_sl((vector unsigned short)vsrc, v3);
507            vG = vec_sl(vB, v3);
508
509            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
510            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
511            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
512            vdst1 = vec_perm(vdst1, valpha, vpermute);
513            vec_st(vdst1, 0, dst);
514
515            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
516            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
517            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
518            vdst2 = vec_perm(vdst2, valpha, vpermute);
519            vec_st(vdst2, 16, dst);
520
521            width -= 8;
522            dst += 32;
523            src += 16;
524            vsrc = voverflow;
525        }
526
527        assert(width == 0);
528
529
530        /* do scalar until we can align... */
531        ONE_PIXEL_BLEND((extrawidth), extrawidth);
532#undef ONE_PIXEL_BLEND
533
534        src += srcskip;  /* move to next row, accounting for pitch. */
535        dst += dstskip;
536    }
537
538}
539
540static void BlitNtoNKey(SDL_BlitInfo *info);
541static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
542static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
543{
544    int height = info->d_height;
545    Uint32 *srcp = (Uint32 *) info->s_pixels;
546    int srcskip = info->s_skip;
547    Uint32 *dstp = (Uint32 *) info->d_pixels;
548    int dstskip = info->d_skip;
549    SDL_PixelFormat *srcfmt = info->src;
550    int srcbpp = srcfmt->BytesPerPixel;
551    SDL_PixelFormat *dstfmt = info->dst;
552    int dstbpp = dstfmt->BytesPerPixel;
553    int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
554	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
555    Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
556	Uint32 ckey = info->src->colorkey;
557    vector unsigned int valpha;
558    vector unsigned char vpermute;
559    vector unsigned char vzero;
560    vector unsigned int vckey;
561    vector unsigned int vrgbmask;
562    vpermute = calc_swizzle32(srcfmt, dstfmt);
563    if (info->d_width < 16) {
564        if(copy_alpha) {
565            BlitNtoNKeyCopyAlpha(info);
566        } else {
567            BlitNtoNKey(info);
568        }
569        return;
570    }
571    vzero = vec_splat_u8(0);
572    if (alpha) {
573        ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
574        valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
575    } else {
576        valpha = (vector unsigned int)vzero;
577    }
578    ckey &= rgbmask;
579    ((unsigned int *)(char*)&vckey)[0] = ckey;
580    vckey = vec_splat(vckey, 0);
581    ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
582    vrgbmask = vec_splat(vrgbmask, 0);
583
584    while (height--) {
585#define ONE_PIXEL_BLEND(condition, widthvar) \
586        if (copy_alpha) { \
587            while (condition) { \
588                Uint32 Pixel; \
589                unsigned sR, sG, sB, sA; \
590                DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
591                          sR, sG, sB, sA); \
592                if ( (Pixel & rgbmask) != ckey ) { \
593                      ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
594                            sR, sG, sB, sA); \
595                } \
596                dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
597                srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
598                widthvar--; \
599            } \
600        } else { \
601            while (condition) { \
602                Uint32 Pixel; \
603                unsigned sR, sG, sB; \
604                RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
605                if ( Pixel != ckey ) { \
606                    RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
607                    ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
608                              sR, sG, sB, alpha); \
609                } \
610                dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
611                srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
612                widthvar--; \
613            } \
614        }
615        int width = info->d_width;
616        ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
617        assert(width > 0);
618        if (width > 0) {
619            int extrawidth = (width % 4);
620            vector unsigned char valigner = VEC_ALIGNER(srcp);
621            vector unsigned int vs = vec_ld(0, srcp);
622            width -= extrawidth;
623            assert(width >= 4);
624            while (width) {
625                vector unsigned char vsel;
626                vector unsigned int vd;
627                vector unsigned int voverflow = vec_ld(15, srcp);
628                /* load the source vec */
629                vs = vec_perm(vs, voverflow, valigner);
630                /* vsel is set for items that match the key */
631                vsel = (vector unsigned char)vec_and(vs, vrgbmask);
632                vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
633                /* permute the src vec to the dest format */
634                vs = vec_perm(vs, valpha, vpermute);
635                /* load the destination vec */
636                vd = vec_ld(0, dstp);
637                /* select the source and dest into vs */
638                vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
639
640                vec_st(vd, 0, dstp);
641                srcp += 4;
642                width -= 4;
643                dstp += 4;
644                vs = voverflow;
645            }
646            ONE_PIXEL_BLEND((extrawidth), extrawidth);
647#undef ONE_PIXEL_BLEND
648            srcp += srcskip >> 2;
649            dstp += dstskip >> 2;
650        }
651    }
652}
653
654/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
655/* Use this on a G5 */
656static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
657{
658    int height = info->d_height;
659    Uint32 *src = (Uint32 *) info->s_pixels;
660    int srcskip = info->s_skip;
661    Uint32 *dst = (Uint32 *) info->d_pixels;
662    int dstskip = info->d_skip;
663    SDL_PixelFormat *srcfmt = info->src;
664    SDL_PixelFormat *dstfmt = info->dst;
665    vector unsigned int vzero = vec_splat_u32(0);
666    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
667    if (dstfmt->Amask && !srcfmt->Amask) {
668        if (srcfmt->alpha) {
669            vector unsigned char valpha;
670            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
671            vzero = (vector unsigned int)vec_splat(valpha, 0);
672        }
673    }
674
675    assert(srcfmt->BytesPerPixel == 4);
676    assert(dstfmt->BytesPerPixel == 4);
677
678    while (height--) {
679        vector unsigned char valigner;
680        vector unsigned int vbits;
681        vector unsigned int voverflow;
682        Uint32 bits;
683        Uint8 r, g, b, a;
684
685        int width = info->d_width;
686        int extrawidth;
687
688        /* do scalar until we can align... */
689        while ((UNALIGNED_PTR(dst)) && (width)) {
690            bits = *(src++);
691            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
692            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
693            width--;
694        }
695
696        /* After all that work, here's the vector part! */
697        extrawidth = (width % 4);
698        width -= extrawidth;
699        valigner = VEC_ALIGNER(src);
700        vbits = vec_ld(0, src);
701
702       while (width) {
703            voverflow = vec_ld(15, src);
704            src += 4;
705            width -= 4;
706            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
707            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
708            vec_st(vbits, 0, dst);  /* store it back out. */
709            dst += 4;
710            vbits = voverflow;
711        }
712
713        assert(width == 0);
714
715        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
716        while (extrawidth) {
717            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
718            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
719            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
720            extrawidth--;
721        }
722
723        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
724        dst += dstskip >> 2;
725    }
726
727}
728
729/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
730/* Use this on a G4 */
731static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
732{
733    const int scalar_dst_lead = sizeof (Uint32) * 4;
734    const int vector_dst_lead = sizeof (Uint32) * 16;
735
736    int height = info->d_height;
737    Uint32 *src = (Uint32 *) info->s_pixels;
738    int srcskip = info->s_skip;
739    Uint32 *dst = (Uint32 *) info->d_pixels;
740    int dstskip = info->d_skip;
741    SDL_PixelFormat *srcfmt = info->src;
742    SDL_PixelFormat *dstfmt = info->dst;
743    vector unsigned int vzero = vec_splat_u32(0);
744    vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
745    if (dstfmt->Amask && !srcfmt->Amask) {
746        if (srcfmt->alpha) {
747            vector unsigned char valpha;
748            ((unsigned char *)&valpha)[0] = srcfmt->alpha;
749            vzero = (vector unsigned int)vec_splat(valpha, 0);
750        }
751    }
752
753    assert(srcfmt->BytesPerPixel == 4);
754    assert(dstfmt->BytesPerPixel == 4);
755
756    while (height--) {
757        vector unsigned char valigner;
758        vector unsigned int vbits;
759        vector unsigned int voverflow;
760        Uint32 bits;
761        Uint8 r, g, b, a;
762
763        int width = info->d_width;
764        int extrawidth;
765
766        /* do scalar until we can align... */
767        while ((UNALIGNED_PTR(dst)) && (width)) {
768            vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
769            vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
770            bits = *(src++);
771            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
772            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
773            width--;
774        }
775
776        /* After all that work, here's the vector part! */
777        extrawidth = (width % 4);
778        width -= extrawidth;
779        valigner = VEC_ALIGNER(src);
780        vbits = vec_ld(0, src);
781
782        while (width) {
783            vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
784            vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
785            voverflow = vec_ld(15, src);
786            src += 4;
787            width -= 4;
788            vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
789            vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
790            vec_st(vbits, 0, dst);  /* store it back out. */
791            dst += 4;
792            vbits = voverflow;
793        }
794
795        assert(width == 0);
796
797        /* cover pixels at the end of the row that didn't fit in 16 bytes. */
798        while (extrawidth) {
799            bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
800            RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
801            *(dst++) = MAKE8888(dstfmt, r, g, b, a);
802            extrawidth--;
803        }
804
805        src += srcskip >> 2;  /* move to next row, accounting for pitch. */
806        dst += dstskip >> 2;
807    }
808
809    vec_dss(DST_CHAN_SRC);
810    vec_dss(DST_CHAN_DEST);
811}
812
813static Uint32 GetBlitFeatures( void )
814{
815    static Uint32 features = 0xffffffff;
816    if (features == 0xffffffff) {
817        /* Provide an override for testing .. */
818        char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
819        if (override) {
820            features = 0;
821            SDL_sscanf(override, "%u", &features);
822        } else {
823            features = ( 0
824                /* Feature 1 is has-MMX */
825                | ((SDL_HasMMX()) ? 1 : 0)
826                /* Feature 2 is has-AltiVec */
827                | ((SDL_HasAltiVec()) ? 2 : 0)
828                /* Feature 4 is dont-use-prefetch */
829                /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
830                | ((GetL3CacheSize() == 0) ? 4 : 0)
831            );
832        }
833    }
834    return features;
835}
836#if __MWERKS__
837#pragma altivec_model off
838#endif
839#else
840/* Feature 1 is has-MMX */
841#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
842#endif
843
844/* This is now endian dependent */
845#if SDL_BYTEORDER == SDL_LIL_ENDIAN
846#define HI	1
847#define LO	0
848#else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
849#define HI	0
850#define LO	1
851#endif
852
853#if SDL_HERMES_BLITTERS
854
855/* Heheheh, we coerce Hermes into using SDL blit information */
856#define X86_ASSEMBLER
857#define HermesConverterInterface	SDL_BlitInfo
858#define HermesClearInterface		void
859#define STACKCALL
860
861#include "../hermes/HeadMMX.h"
862#include "../hermes/HeadX86.h"
863
864#else
865
866/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
867#define RGB888_RGB332(dst, src) { \
868	dst = (Uint8)((((src)&0x00E00000)>>16)| \
869	              (((src)&0x0000E000)>>11)| \
870	              (((src)&0x000000C0)>>6)); \
871}
872static void Blit_RGB888_index8(SDL_BlitInfo *info)
873{
874#ifndef USE_DUFFS_LOOP
875	int c;
876#endif
877	int width, height;
878	Uint32 *src;
879	const Uint8 *map;
880	Uint8 *dst;
881	int srcskip, dstskip;
882
883	/* Set up some basic variables */
884	width = info->d_width;
885	height = info->d_height;
886	src = (Uint32 *)info->s_pixels;
887	srcskip = info->s_skip/4;
888	dst = info->d_pixels;
889	dstskip = info->d_skip;
890	map = info->table;
891
892	if ( map == NULL ) {
893		while ( height-- ) {
894#ifdef USE_DUFFS_LOOP
895			DUFFS_LOOP(
896				RGB888_RGB332(*dst++, *src);
897			, width);
898#else
899			for ( c=width/4; c; --c ) {
900				/* Pack RGB into 8bit pixel */
901				++src;
902				RGB888_RGB332(*dst++, *src);
903				++src;
904				RGB888_RGB332(*dst++, *src);
905				++src;
906				RGB888_RGB332(*dst++, *src);
907				++src;
908			}
909			switch ( width & 3 ) {
910				case 3:
911					RGB888_RGB332(*dst++, *src);
912					++src;
913				case 2:
914					RGB888_RGB332(*dst++, *src);
915					++src;
916				case 1:
917					RGB888_RGB332(*dst++, *src);
918					++src;
919			}
920#endif /* USE_DUFFS_LOOP */
921			src += srcskip;
922			dst += dstskip;
923		}
924	} else {
925		int Pixel;
926
927		while ( height-- ) {
928#ifdef USE_DUFFS_LOOP
929			DUFFS_LOOP(
930				RGB888_RGB332(Pixel, *src);
931				*dst++ = map[Pixel];
932				++src;
933			, width);
934#else
935			for ( c=width/4; c; --c ) {
936				/* Pack RGB into 8bit pixel */
937				RGB888_RGB332(Pixel, *src);
938				*dst++ = map[Pixel];
939				++src;
940				RGB888_RGB332(Pixel, *src);
941				*dst++ = map[Pixel];
942				++src;
943				RGB888_RGB332(Pixel, *src);
944				*dst++ = map[Pixel];
945				++src;
946				RGB888_RGB332(Pixel, *src);
947				*dst++ = map[Pixel];
948				++src;
949			}
950			switch ( width & 3 ) {
951				case 3:
952					RGB888_RGB332(Pixel, *src);
953					*dst++ = map[Pixel];
954					++src;
955				case 2:
956					RGB888_RGB332(Pixel, *src);
957					*dst++ = map[Pixel];
958					++src;
959				case 1:
960					RGB888_RGB332(Pixel, *src);
961					*dst++ = map[Pixel];
962					++src;
963			}
964#endif /* USE_DUFFS_LOOP */
965			src += srcskip;
966			dst += dstskip;
967		}
968	}
969}
970/* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
971#define RGB888_RGB555(dst, src) { \
972	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
973	                            (((*src)&0x0000F800)>>6)| \
974	                            (((*src)&0x000000F8)>>3)); \
975}
976#define RGB888_RGB555_TWO(dst, src) { \
977	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
978	                     (((src[HI])&0x0000F800)>>6)| \
979	                     (((src[HI])&0x000000F8)>>3))<<16)| \
980	                     (((src[LO])&0x00F80000)>>9)| \
981	                     (((src[LO])&0x0000F800)>>6)| \
982	                     (((src[LO])&0x000000F8)>>3); \
983}
984static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
985{
986#ifndef USE_DUFFS_LOOP
987	int c;
988#endif
989	int width, height;
990	Uint32 *src;
991	Uint16 *dst;
992	int srcskip, dstskip;
993
994	/* Set up some basic variables */
995	width = info->d_width;
996	height = info->d_height;
997	src = (Uint32 *)info->s_pixels;
998	srcskip = info->s_skip/4;
999	dst = (Uint16 *)info->d_pixels;
1000	dstskip = info->d_skip/2;
1001
1002#ifdef USE_DUFFS_LOOP
1003	while ( height-- ) {
1004		DUFFS_LOOP(
1005			RGB888_RGB555(dst, src);
1006			++src;
1007			++dst;
1008		, width);
1009		src += srcskip;
1010		dst += dstskip;
1011	}
1012#else
1013	/* Memory align at 4-byte boundary, if necessary */
1014	if ( (long)dst & 0x03 ) {
1015		/* Don't do anything if width is 0 */
1016		if ( width == 0 ) {
1017			return;
1018		}
1019		--width;
1020
1021		while ( height-- ) {
1022			/* Perform copy alignment */
1023			RGB888_RGB555(dst, src);
1024			++src;
1025			++dst;
1026
1027			/* Copy in 4 pixel chunks */
1028			for ( c=width/4; c; --c ) {
1029				RGB888_RGB555_TWO(dst, src);
1030				src += 2;
1031				dst += 2;
1032				RGB888_RGB555_TWO(dst, src);
1033				src += 2;
1034				dst += 2;
1035			}
1036			/* Get any leftovers */
1037			switch (width & 3) {
1038				case 3:
1039					RGB888_RGB555(dst, src);
1040					++src;
1041					++dst;
1042				case 2:
1043					RGB888_RGB555_TWO(dst, src);
1044					src += 2;
1045					dst += 2;
1046					break;
1047				case 1:
1048					RGB888_RGB555(dst, src);
1049					++src;
1050					++dst;
1051					break;
1052			}
1053			src += srcskip;
1054			dst += dstskip;
1055		}
1056	} else {
1057		while ( height-- ) {
1058			/* Copy in 4 pixel chunks */
1059			for ( c=width/4; c; --c ) {
1060				RGB888_RGB555_TWO(dst, src);
1061				src += 2;
1062				dst += 2;
1063				RGB888_RGB555_TWO(dst, src);
1064				src += 2;
1065				dst += 2;
1066			}
1067			/* Get any leftovers */
1068			switch (width & 3) {
1069				case 3:
1070					RGB888_RGB555(dst, src);
1071					++src;
1072					++dst;
1073				case 2:
1074					RGB888_RGB555_TWO(dst, src);
1075					src += 2;
1076					dst += 2;
1077					break;
1078				case 1:
1079					RGB888_RGB555(dst, src);
1080					++src;
1081					++dst;
1082					break;
1083			}
1084			src += srcskip;
1085			dst += dstskip;
1086		}
1087	}
1088#endif /* USE_DUFFS_LOOP */
1089}
1090/* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
1091#define RGB888_RGB565(dst, src) { \
1092	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
1093	                            (((*src)&0x0000FC00)>>5)| \
1094	                            (((*src)&0x000000F8)>>3)); \
1095}
1096#define RGB888_RGB565_TWO(dst, src) { \
1097	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
1098	                     (((src[HI])&0x0000FC00)>>5)| \
1099	                     (((src[HI])&0x000000F8)>>3))<<16)| \
1100	                     (((src[LO])&0x00F80000)>>8)| \
1101	                     (((src[LO])&0x0000FC00)>>5)| \
1102	                     (((src[LO])&0x000000F8)>>3); \
1103}
1104static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
1105{
1106#ifndef USE_DUFFS_LOOP
1107	int c;
1108#endif
1109	int width, height;
1110	Uint32 *src;
1111	Uint16 *dst;
1112	int srcskip, dstskip;
1113
1114	/* Set up some basic variables */
1115	width = info->d_width;
1116	height = info->d_height;
1117	src = (Uint32 *)info->s_pixels;
1118	srcskip = info->s_skip/4;
1119	dst = (Uint16 *)info->d_pixels;
1120	dstskip = info->d_skip/2;
1121
1122#ifdef USE_DUFFS_LOOP
1123	while ( height-- ) {
1124		DUFFS_LOOP(
1125			RGB888_RGB565(dst, src);
1126			++src;
1127			++dst;
1128		, width);
1129		src += srcskip;
1130		dst += dstskip;
1131	}
1132#else
1133	/* Memory align at 4-byte boundary, if necessary */
1134	if ( (long)dst & 0x03 ) {
1135		/* Don't do anything if width is 0 */
1136		if ( width == 0 ) {
1137			return;
1138		}
1139		--width;
1140
1141		while ( height-- ) {
1142			/* Perform copy alignment */
1143			RGB888_RGB565(dst, src);
1144			++src;
1145			++dst;
1146
1147			/* Copy in 4 pixel chunks */
1148			for ( c=width/4; c; --c ) {
1149				RGB888_RGB565_TWO(dst, src);
1150				src += 2;
1151				dst += 2;
1152				RGB888_RGB565_TWO(dst, src);
1153				src += 2;
1154				dst += 2;
1155			}
1156			/* Get any leftovers */
1157			switch (width & 3) {
1158				case 3:
1159					RGB888_RGB565(dst, src);
1160					++src;
1161					++dst;
1162				case 2:
1163					RGB888_RGB565_TWO(dst, src);
1164					src += 2;
1165					dst += 2;
1166					break;
1167				case 1:
1168					RGB888_RGB565(dst, src);
1169					++src;
1170					++dst;
1171					break;
1172			}
1173			src += srcskip;
1174			dst += dstskip;
1175		}
1176	} else {
1177		while ( height-- ) {
1178			/* Copy in 4 pixel chunks */
1179			for ( c=width/4; c; --c ) {
1180				RGB888_RGB565_TWO(dst, src);
1181				src += 2;
1182				dst += 2;
1183				RGB888_RGB565_TWO(dst, src);
1184				src += 2;
1185				dst += 2;
1186			}
1187			/* Get any leftovers */
1188			switch (width & 3) {
1189				case 3:
1190					RGB888_RGB565(dst, src);
1191					++src;
1192					++dst;
1193				case 2:
1194					RGB888_RGB565_TWO(dst, src);
1195					src += 2;
1196					dst += 2;
1197					break;
1198				case 1:
1199					RGB888_RGB565(dst, src);
1200					++src;
1201					++dst;
1202					break;
1203			}
1204			src += srcskip;
1205			dst += dstskip;
1206		}
1207	}
1208#endif /* USE_DUFFS_LOOP */
1209}
1210
1211#endif /* SDL_HERMES_BLITTERS */
1212
1213
1214/* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
1215#define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
1216static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
1217{
1218#ifndef USE_DUFFS_LOOP
1219	int c;
1220#endif
1221	int width, height;
1222	Uint8 *src;
1223	Uint32 *dst;
1224	int srcskip, dstskip;
1225
1226	/* Set up some basic variables */
1227	width = info->d_width;
1228	height = info->d_height;
1229	src = (Uint8 *)info->s_pixels;
1230	srcskip = info->s_skip;
1231	dst = (Uint32 *)info->d_pixels;
1232	dstskip = info->d_skip/4;
1233
1234#ifdef USE_DUFFS_LOOP
1235	while ( height-- ) {
1236		DUFFS_LOOP(
1237		{
1238			*dst++ = RGB565_32(dst, src, map);
1239			src += 2;
1240		},
1241		width);
1242		src += srcskip;
1243		dst += dstskip;
1244	}
1245#else
1246	while ( height-- ) {
1247		/* Copy in 4 pixel chunks */
1248		for ( c=width/4; c; --c ) {
1249			*dst++ = RGB565_32(dst, src, map);
1250			src += 2;
1251			*dst++ = RGB565_32(dst, src, map);
1252			src += 2;
1253			*dst++ = RGB565_32(dst, src, map);
1254			src += 2;
1255			*dst++ = RGB565_32(dst, src, map);
1256			src += 2;
1257		}
1258		/* Get any leftovers */
1259		switch (width & 3) {
1260			case 3:
1261				*dst++ = RGB565_32(dst, src, map);
1262				src += 2;
1263			case 2:
1264				*dst++ = RGB565_32(dst, src, map);
1265				src += 2;
1266			case 1:
1267				*dst++ = RGB565_32(dst, src, map);
1268				src += 2;
1269				break;
1270		}
1271		src += srcskip;
1272		dst += dstskip;
1273	}
1274#endif /* USE_DUFFS_LOOP */
1275}
1276
1277/* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
1278static const Uint32 RGB565_ARGB8888_LUT[512] = {
1279		0x00000000, 0xff000000, 0x00000008, 0xff002000,
1280		0x00000010, 0xff004000, 0x00000018, 0xff006100,
1281		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
1282		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
1283		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
1284		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
1285		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
1286		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
1287		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
1288		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
1289		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
1290		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
1291		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
1292		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
1293		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
1294		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
1295		0x00000400, 0xff200000, 0x00000408, 0xff202000,
1296		0x00000410, 0xff204000, 0x00000418, 0xff206100,
1297		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
1298		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
1299		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
1300		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
1301		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
1302		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
1303		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
1304		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
1305		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
1306		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
1307		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
1308		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
1309		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
1310		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
1311		0x00000800, 0xff410000, 0x00000808, 0xff412000,
1312		0x00000810, 0xff414000, 0x00000818, 0xff416100,
1313		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
1314		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
1315		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
1316		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
1317		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
1318		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
1319		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
1320		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
1321		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
1322		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
1323		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
1324		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
1325		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
1326		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
1327		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
1328		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
1329		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
1330		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
1331		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
1332		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
1333		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
1334		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
1335		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
1336		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
1337		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
1338		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
1339		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
1340		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
1341		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
1342		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
1343		0x00001000, 0xff830000, 0x00001008, 0xff832000,
1344		0x00001010, 0xff834000, 0x00001018, 0xff836100,
1345		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
1346		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
1347		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
1348		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
1349		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
1350		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
1351		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
1352		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
1353		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
1354		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
1355		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
1356		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
1357		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
1358		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
1359		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
1360		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
1361		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
1362		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
1363		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
1364		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
1365		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
1366		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
1367		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
1368		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
1369		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
1370		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
1371		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
1372		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
1373		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
1374		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
1375		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
1376		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
1377		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
1378		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
1379		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
1380		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
1381		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
1382		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
1383		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
1384		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
1385		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
1386		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
1387		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
1388		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
1389		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
1390		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
1391		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
1392		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
1393		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
1394		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
1395		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
1396		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
1397		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
1398		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
1399		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
1400		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
1401		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
1402		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
1403		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
1404		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
1405		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
1406		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
1407};
1408static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
1409{
1410    Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
1411}
1412
1413/* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
1414static const Uint32 RGB565_ABGR8888_LUT[512] = {
1415		0xff000000, 0x00000000, 0xff080000, 0x00002000,
1416		0xff100000, 0x00004000, 0xff180000, 0x00006100,
1417		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
1418		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
1419		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
1420		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
1421		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
1422		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
1423		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
1424		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
1425		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
1426		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
1427		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
1428		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
1429		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
1430		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
1431		0xff000400, 0x00000020, 0xff080400, 0x00002020,
1432		0xff100400, 0x00004020, 0xff180400, 0x00006120,
1433		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
1434		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
1435		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
1436		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
1437		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
1438		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
1439		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
1440		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
1441		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
1442		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
1443		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
1444		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
1445		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
1446		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
1447		0xff000800, 0x00000041, 0xff080800, 0x00002041,
1448		0xff100800, 0x00004041, 0xff180800, 0x00006141,
1449		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
1450		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
1451		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
1452		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
1453		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
1454		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
1455		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
1456		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
1457		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
1458		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
1459		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
1460		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
1461		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
1462		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
1463		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
1464		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
1465		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
1466		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
1467		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
1468		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
1469		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
1470		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
1471		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
1472		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
1473		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
1474		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
1475		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
1476		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
1477		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
1478		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
1479		0xff001000, 0x00000083, 0xff081000, 0x00002083,
1480		0xff101000, 0x00004083, 0xff181000, 0x00006183,
1481		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
1482		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
1483		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
1484		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
1485		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
1486		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
1487		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
1488		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
1489		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
1490		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
1491		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
1492		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
1493		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
1494		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
1495		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
1496		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
1497		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
1498		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
1499		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
1500		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
1501		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
1502		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
1503		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
1504		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
1505		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
1506		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
1507		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
1508		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
1509		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
1510		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
1511		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
1512		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
1513		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
1514		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
1515		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
1516		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
1517		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
1518		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
1519		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
1520		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
1521		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
1522		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
1523		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
1524		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
1525		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
1526		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
1527		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
1528		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
1529		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
1530		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
1531		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
1532		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
1533		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
1534		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
1535		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
1536		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
1537		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
1538		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
1539		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
1540		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
1541		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
1542		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
1543};
1544static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
1545{
1546    Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
1547}
1548
1549/* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
1550static const Uint32 RGB565_RGBA8888_LUT[512] = {
1551		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
1552		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
1553		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
1554		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
1555		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
1556		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
1557		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
1558		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
1559		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
1560		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
1561		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
1562		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
1563		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
1564		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
1565		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
1566		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
1567		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
1568		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
1569		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
1570		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
1571		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
1572		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
1573		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
1574		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
1575		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
1576		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
1577		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
1578		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
1579		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
1580		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
1581		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
1582		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
1583		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
1584		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
1585		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
1586		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
1587		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
1588		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
1589		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
1590		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
1591		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
1592		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
1593		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
1594		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
1595		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
1596		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
1597		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
1598		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
1599		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
1600		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
1601		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
1602		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
1603		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
1604		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
1605		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
1606		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
1607		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
1608		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
1609		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
1610		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
1611		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
1612		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
1613		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
1614		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
1615		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
1616		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
1617		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
1618		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
1619		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
1620		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
1621		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
1622		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
1623		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
1624		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
1625		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
1626		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
1627		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
1628		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
1629		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
1630		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
1631		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
1632		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
1633		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
1634		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
1635		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
1636		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
1637		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
1638		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
1639		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
1640		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
1641		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
1642		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
1643		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
1644		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
1645		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
1646		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
1647		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
1648		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
1649		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
1650		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
1651		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
1652		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
1653		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
1654		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
1655		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
1656		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
1657		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
1658		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
1659		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
1660		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
1661		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
1662		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
1663		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
1664		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
1665		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
1666		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
1667		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
1668		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
1669		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
1670		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
1671		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
1672		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
1673		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
1674		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
1675		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
1676		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
1677		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
1678		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
1679};
1680static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
1681{
1682    Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
1683}
1684
1685/* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
1686static const Uint32 RGB565_BGRA8888_LUT[512] = {
1687		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
1688		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
1689		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
1690		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
1691		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
1692		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
1693		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
1694		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
1695		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
1696		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
1697		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
1698		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
1699		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
1700		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
1701		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
1702		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
1703		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
1704		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
1705		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
1706		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
1707		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
1708		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
1709		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
1710		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
1711		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
1712		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
1713		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
1714		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
1715		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
1716		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
1717		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
1718		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
1719		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
1720		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
1721		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
1722		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
1723		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
1724		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
1725		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
1726		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
1727		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
1728		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
1729		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
1730		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
1731		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
1732		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
1733		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
1734		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
1735		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
1736		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
1737		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
1738		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
1739		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
1740		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
1741		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
1742		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
1743		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
1744		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
1745		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
1746		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
1747		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
1748		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
1749		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
1750		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
1751		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
1752		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
1753		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
1754		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
1755		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
1756		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
1757		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
1758		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
1759		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
1760		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
1761		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
1762		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
1763		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
1764		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
1765		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
1766		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
1767		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
1768		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
1769		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
1770		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
1771		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
1772		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
1773		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
1774		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
1775		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
1776		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
1777		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
1778		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
1779		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
1780		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
1781		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
1782		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
1783		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
1784		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
1785		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
1786		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
1787		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
1788		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
1789		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
1790		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
1791		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
1792		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
1793		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
1794		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
1795		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
1796		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
1797		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
1798		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
1799		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
1800		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
1801		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
1802		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
1803		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
1804		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
1805		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
1806		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
1807		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
1808		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
1809		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
1810		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
1811		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
1812		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
1813		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
1814		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
1815};
1816static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
1817{
1818    Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
1819}
1820
1821/* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
1822#ifndef RGB888_RGB332
1823#define RGB888_RGB332(dst, src) { \
1824	dst = (((src)&0x00E00000)>>16)| \
1825	      (((src)&0x0000E000)>>11)| \
1826	      (((src)&0x000000C0)>>6); \
1827}
1828#endif
1829static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
1830{
1831#ifndef USE_DUFFS_LOOP
1832	int c;
1833#endif
1834	int Pixel;
1835	int width, height;
1836	Uint32 *src;
1837	const Uint8 *map;
1838	Uint8 *dst;
1839	int srcskip, dstskip;
1840
1841	/* Set up some basic variables */
1842	width = info->d_width;
1843	height = info->d_height;
1844	src = (Uint32 *)info->s_pixels;
1845	srcskip = info->s_skip/4;
1846	dst = info->d_pixels;
1847	dstskip = info->d_skip;
1848	map = info->table;
1849
1850#ifdef USE_DUFFS_LOOP
1851	while ( height-- ) {
1852		DUFFS_LOOP(
1853			RGB888_RGB332(Pixel, *src);
1854			*dst++ = map[Pixel];
1855			++src;
1856		, width);
1857		src += srcskip;
1858		dst += dstskip;
1859	}
1860#else
1861	while ( height-- ) {
1862		for ( c=width/4; c; --c ) {
1863			/* Pack RGB into 8bit pixel */
1864			RGB888_RGB332(Pixel, *src);
1865			*dst++ = map[Pixel];
1866			++src;
1867			RGB888_RGB332(Pixel, *src);
1868			*dst++ = map[Pixel];
1869			++src;
1870			RGB888_RGB332(Pixel, *src);
1871			*dst++ = map[Pixel];
1872			++src;
1873			RGB888_RGB332(Pixel, *src);
1874			*dst++ = map[Pixel];
1875			++src;
1876		}
1877		switch ( width & 3 ) {
1878			case 3:
1879				RGB888_RGB332(Pixel, *src);
1880				*dst++ = map[Pixel];
1881				++src;
1882			case 2:
1883				RGB888_RGB332(Pixel, *src);
1884				*dst++ = map[Pixel];
1885				++src;
1886			case 1:
1887				RGB888_RGB332(Pixel, *src);
1888				*dst++ = map[Pixel];
1889				++src;
1890		}
1891		src += srcskip;
1892		dst += dstskip;
1893	}
1894#endif /* USE_DUFFS_LOOP */
1895}
1896static void BlitNto1(SDL_BlitInfo *info)
1897{
1898#ifndef USE_DUFFS_LOOP
1899	int c;
1900#endif
1901	int width, height;
1902	Uint8 *src;
1903	const Uint8 *map;
1904	Uint8 *dst;
1905	int srcskip, dstskip;
1906	int srcbpp;
1907	Uint32 Pixel;
1908	int  sR, sG, sB;
1909	SDL_PixelFormat *srcfmt;
1910
1911	/* Set up some basic variables */
1912	width = info->d_width;
1913	height = info->d_height;
1914	src = info->s_pixels;
1915	srcskip = info->s_skip;
1916	dst = info->d_pixels;
1917	dstskip = info->d_skip;
1918	map = info->table;
1919	srcfmt = info->src;
1920	srcbpp = srcfmt->BytesPerPixel;
1921
1922	if ( map == NULL ) {
1923		while ( height-- ) {
1924#ifdef USE_DUFFS_LOOP
1925			DUFFS_LOOP(
1926				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1927								sR, sG, sB);
1928				if ( 1 ) {
1929				  	/* Pack RGB into 8bit pixel */
1930				  	*dst = ((sR>>5)<<(3+2))|
1931					        ((sG>>5)<<(2)) |
1932					        ((sB>>6)<<(0)) ;
1933				}
1934				dst++;
1935				src += srcbpp;
1936			, width);
1937#else
1938			for ( c=width; c; --c ) {
1939				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1940								sR, sG, sB);
1941				if ( 1 ) {
1942				  	/* Pack RGB into 8bit pixel */
1943				  	*dst = ((sR>>5)<<(3+2))|
1944					        ((sG>>5)<<(2)) |
1945					        ((sB>>6)<<(0)) ;
1946				}
1947				dst++;
1948				src += srcbpp;
1949			}
1950#endif
1951			src += srcskip;
1952			dst += dstskip;
1953		}
1954	} else {
1955		while ( height-- ) {
1956#ifdef USE_DUFFS_LOOP
1957			DUFFS_LOOP(
1958				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1959								sR, sG, sB);
1960				if ( 1 ) {
1961				  	/* Pack RGB into 8bit pixel */
1962				  	*dst = map[((sR>>5)<<(3+2))|
1963						   ((sG>>5)<<(2))  |
1964						   ((sB>>6)<<(0))  ];
1965				}
1966				dst++;
1967				src += srcbpp;
1968			, width);
1969#else
1970			for ( c=width; c; --c ) {
1971				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
1972								sR, sG, sB);
1973				if ( 1 ) {
1974				  	/* Pack RGB into 8bit pixel */
1975				  	*dst = map[((sR>>5)<<(3+2))|
1976						   ((sG>>5)<<(2))  |
1977						   ((sB>>6)<<(0))  ];
1978				}
1979				dst++;
1980				src += srcbpp;
1981			}
1982#endif /* USE_DUFFS_LOOP */
1983			src += srcskip;
1984			dst += dstskip;
1985		}
1986	}
1987}
1988
1989/* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
1990static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
1991{
1992	int width = info->d_width;
1993	int height = info->d_height;
1994	Uint32 *src = (Uint32 *)info->s_pixels;
1995	int srcskip = info->s_skip;
1996	Uint32 *dst = (Uint32 *)info->d_pixels;
1997	int dstskip = info->d_skip;
1998	SDL_PixelFormat *srcfmt = info->src;
1999	SDL_PixelFormat *dstfmt = info->dst;
2000
2001	if (dstfmt->Amask) {
2002		/* RGB->RGBA, SET_ALPHA */
2003		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
2004
2005		while ( height-- ) {
2006			DUFFS_LOOP(
2007			{
2008				*dst = *src | mask;
2009				++dst;
2010				++src;
2011			},
2012			width);
2013			src = (Uint32*)((Uint8*)src + srcskip);
2014			dst = (Uint32*)((Uint8*)dst + dstskip);
2015		}
2016	} else {
2017		/* RGBA->RGB, NO_ALPHA */
2018		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
2019
2020		while ( height-- ) {
2021			DUFFS_LOOP(
2022			{
2023				*dst = *src & mask;
2024				++dst;
2025				++src;
2026			},
2027			width);
2028			src = (Uint32*)((Uint8*)src + srcskip);
2029			dst = (Uint32*)((Uint8*)dst + dstskip);
2030		}
2031	}
2032}
2033
2034static void BlitNtoN(SDL_BlitInfo *info)
2035{
2036	int width = info->d_width;
2037	int height = info->d_height;
2038	Uint8 *src = info->s_pixels;
2039	int srcskip = info->s_skip;
2040	Uint8 *dst = info->d_pixels;
2041	int dstskip = info->d_skip;
2042	SDL_PixelFormat *srcfmt = info->src;
2043	int srcbpp = srcfmt->BytesPerPixel;
2044	SDL_PixelFormat *dstfmt = info->dst;
2045	int dstbpp = dstfmt->BytesPerPixel;
2046	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
2047
2048	while ( height-- ) {
2049		DUFFS_LOOP(
2050		{
2051		        Uint32 Pixel;
2052			unsigned sR;
2053			unsigned sG;
2054			unsigned sB;
2055			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2056			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
2057			dst += dstbpp;
2058			src += srcbpp;
2059		},
2060		width);
2061		src += srcskip;
2062		dst += dstskip;
2063	}
2064}
2065
2066static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
2067{
2068	int width = info->d_width;
2069	int height = info->d_height;
2070	Uint8 *src = info->s_pixels;
2071	int srcskip = info->s_skip;
2072	Uint8 *dst = info->d_pixels;
2073	int dstskip = info->d_skip;
2074	SDL_PixelFormat *srcfmt = info->src;
2075	int srcbpp = srcfmt->BytesPerPixel;
2076	SDL_PixelFormat *dstfmt = info->dst;
2077	int dstbpp = dstfmt->BytesPerPixel;
2078	int c;
2079
2080	/* FIXME: should map alpha to [0..255] correctly! */
2081	while ( height-- ) {
2082		for ( c=width; c; --c ) {
2083		        Uint32 Pixel;
2084			unsigned sR, sG, sB, sA;
2085			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
2086				      sR, sG, sB, sA);
2087			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
2088				      sR, sG, sB, sA);
2089			dst += dstbpp;
2090			src += srcbpp;
2091		}
2092		src += srcskip;
2093		dst += dstskip;
2094	}
2095}
2096
2097static void BlitNto1Key(SDL_BlitInfo *info)
2098{
2099	int width = info->d_width;
2100	int height = info->d_height;
2101	Uint8 *src = info->s_pixels;
2102	int srcskip = info->s_skip;
2103	Uint8 *dst = info->d_pixels;
2104	int dstskip = info->d_skip;
2105	SDL_PixelFormat *srcfmt = info->src;
2106	const Uint8 *palmap = info->table;
2107	Uint32 ckey = srcfmt->colorkey;
2108	Uint32 rgbmask = ~srcfmt->Amask;
2109	int srcbpp;
2110	Uint32 Pixel;
2111	unsigned sR, sG, sB;
2112
2113	/* Set up some basic variables */
2114	srcbpp = srcfmt->BytesPerPixel;
2115	ckey &= rgbmask;
2116
2117	if ( palmap == NULL ) {
2118		while ( height-- ) {
2119			DUFFS_LOOP(
2120			{
2121				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2122								sR, sG, sB);
2123				if ( (Pixel & rgbmask) != ckey ) {
2124				  	/* Pack RGB into 8bit pixel */
2125				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
2126						           ((sG>>5)<<(2)) |
2127						           ((sB>>6)<<(0)));
2128				}
2129				dst++;
2130				src += srcbpp;
2131			},
2132			width);
2133			src += srcskip;
2134			dst += dstskip;
2135		}
2136	} else {
2137		while ( height-- ) {
2138			DUFFS_LOOP(
2139			{
2140				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2141								sR, sG, sB);
2142				if ( (Pixel & rgbmask) != ckey ) {
2143				  	/* Pack RGB into 8bit pixel */
2144				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
2145							             ((sG>>5)<<(2))  |
2146							             ((sB>>6)<<(0))  ];
2147				}
2148				dst++;
2149				src += srcbpp;
2150			},
2151			width);
2152			src += srcskip;
2153			dst += dstskip;
2154		}
2155	}
2156}
2157
2158static void Blit2to2Key(SDL_BlitInfo *info)
2159{
2160	int width = info->d_width;
2161	int height = info->d_height;
2162	Uint16 *srcp = (Uint16 *)info->s_pixels;
2163	int srcskip = info->s_skip;
2164	Uint16 *dstp = (Uint16 *)info->d_pixels;
2165	int dstskip = info->d_skip;
2166	Uint32 ckey = info->src->colorkey;
2167	Uint32 rgbmask = ~info->src->Amask;
2168
2169	/* Set up some basic variables */
2170        srcskip /= 2;
2171        dstskip /= 2;
2172	ckey &= rgbmask;
2173
2174	while ( height-- ) {
2175		DUFFS_LOOP(
2176		{
2177			if ( (*srcp & rgbmask) != ckey ) {
2178				*dstp = *srcp;
2179			}
2180			dstp++;
2181			srcp++;
2182		},
2183		width);
2184		srcp += srcskip;
2185		dstp += dstskip;
2186	}
2187}
2188
2189static void BlitNtoNKey(SDL_BlitInfo *info)
2190{
2191	int width = info->d_width;
2192	int height = info->d_height;
2193	Uint8 *src = info->s_pixels;
2194	int srcskip = info->s_skip;
2195	Uint8 *dst = info->d_pixels;
2196	int dstskip = info->d_skip;
2197	Uint32 ckey = info->src->colorkey;
2198	SDL_PixelFormat *srcfmt = info->src;
2199	SDL_PixelFormat *dstfmt = info->dst;
2200	int srcbpp = srcfmt->BytesPerPixel;
2201	int dstbpp = dstfmt->BytesPerPixel;
2202	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
2203	Uint32 rgbmask = ~srcfmt->Amask;
2204
2205	/* Set up some basic variables */
2206	ckey &= rgbmask;
2207
2208	while ( height-- ) {
2209		DUFFS_LOOP(
2210		{
2211		        Uint32 Pixel;
2212			unsigned sR;
2213			unsigned sG;
2214			unsigned sB;
2215			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
2216			if ( (Pixel & rgbmask) != ckey ) {
2217			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
2218				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
2219					      sR, sG, sB, alpha);
2220			}
2221			dst += dstbpp;
2222			src += srcbpp;
2223		},
2224		width);
2225		src += srcskip;
2226		dst += dstskip;
2227	}
2228}
2229
2230static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
2231{
2232	int width = info->d_width;
2233	int height = info->d_height;
2234	Uint8 *src = info->s_pixels;
2235	int srcskip = info->s_skip;
2236	Uint8 *dst = info->d_pixels;
2237	int dstskip = info->d_skip;
2238	Uint32 ckey = info->src->colorkey;
2239	SDL_PixelFormat *srcfmt = info->src;
2240	SDL_PixelFormat *dstfmt = info->dst;
2241	Uint32 rgbmask = ~srcfmt->Amask;
2242
2243	Uint8 srcbpp;
2244	Uint8 dstbpp;
2245	Uint32 Pixel;
2246	unsigned sR, sG, sB, sA;
2247
2248	/* Set up some basic variables */
2249	srcbpp = srcfmt->BytesPerPixel;
2250	dstbpp = dstfmt->BytesPerPixel;
2251	ckey &= rgbmask;
2252
2253	/* FIXME: should map alpha to [0..255] correctly! */
2254	while ( height-- ) {
2255		DUFFS_LOOP(
2256		{
2257			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
2258				      sR, sG, sB, sA);
2259			if ( (Pixel & rgbmask) != ckey ) {
2260				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
2261						sR, sG, sB, sA);
2262			}
2263			dst += dstbpp;
2264			src += srcbpp;
2265		},
2266		width);
2267		src += srcskip;
2268		dst += dstskip;
2269	}
2270}
2271
2272/* Normal N to N optimized blitters */
2273struct blit_table {
2274	Uint32 srcR, srcG, srcB;
2275	int dstbpp;
2276	Uint32 dstR, dstG, dstB;
2277	Uint32 blit_features;
2278	void *aux_data;
2279	SDL_loblit blitfunc;
2280	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
2281};
2282static const struct blit_table normal_blit_1[] = {
2283	/* Default for 8-bit RGB source, an invalid combination */
2284	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
2285};
2286static const struct blit_table normal_blit_2[] = {
2287#if SDL_HERMES_BLITTERS
2288    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
2289      0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
2290    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
2291      0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
2292    { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
2293      0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
2294#elif SDL_ALTIVEC_BLITTERS
2295    /* has-altivec */
2296    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
2297      2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2298    { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
2299      2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2300#endif
2301    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
2302      0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
2303    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
2304      0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
2305    { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
2306      0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
2307    { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
2308      0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
2309
2310    /* Default for 16-bit RGB source, used if no other blitter matches */
2311    { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
2312};
2313static const struct blit_table normal_blit_3[] = {
2314	/* Default for 24-bit RGB source, never optimized */
2315    { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
2316};
2317static const struct blit_table normal_blit_4[] = {
2318#if SDL_HERMES_BLITTERS
2319    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2320      1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
2321    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2322      0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
2323    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
2324      1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
2325    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
2326      0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
2327    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
2328      1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
2329    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
2330      0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
2331    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
2332      1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
2333    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
2334      0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
2335    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
2336      1, ConvertMMXpII32_24RGB888, ConvertMMX, NO_ALPHA },
2337    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
2338      0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
2339    { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
2340      0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
2341    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
2342      0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
2343    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
2344      0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
2345    { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
2346      0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
2347#else
2348#if SDL_ALTIVEC_BLITTERS
2349    /* has-altivec | dont-use-prefetch */
2350    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
2351      6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2352    /* has-altivec */
2353    { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
2354      2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
2355    /* has-altivec */
2356    { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
2357      2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
2358#endif
2359    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
2360      0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
2361    { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
2362      0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
2363#endif
2364	/* Default for 32-bit RGB source, used if no other blitter matches */
2365	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
2366};
2367static const struct blit_table *normal_blit[] = {
2368	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
2369};
2370
2371/* Mask matches table, or table entry is zero */
2372#define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
2373
2374SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
2375{
2376	struct private_swaccel *sdata;
2377	SDL_PixelFormat *srcfmt;
2378	SDL_PixelFormat *dstfmt;
2379	const struct blit_table *table;
2380	int which;
2381	SDL_loblit blitfun;
2382
2383	/* Set up data for choosing the blit */
2384	sdata = surface->map->sw_data;
2385	srcfmt = surface->format;
2386	dstfmt = surface->map->dst->format;
2387
2388	if ( blit_index & 2 ) {
2389	        /* alpha or alpha+colorkey */
2390	        return SDL_CalculateAlphaBlit(surface, blit_index);
2391	}
2392
2393	/* We don't support destinations less than 8-bits */
2394	if ( dstfmt->BitsPerPixel < 8 ) {
2395		return(NULL);
2396	}
2397
2398	if(blit_index == 1) {
2399	    /* colorkey blit: Here we don't have too many options, mostly
2400	       because RLE is the preferred fast way to deal with this.
2401	       If a particular case turns out to be useful we'll add it. */
2402
2403	    if(srcfmt->BytesPerPixel == 2
2404	       && surface->map->identity)
2405		return Blit2to2Key;
2406	    else if(dstfmt->BytesPerPixel == 1)
2407		return BlitNto1Key;
2408	    else {
2409#if SDL_ALTIVEC_BLITTERS
2410        if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
2411            return Blit32to32KeyAltivec;
2412        } else
2413#endif
2414
2415		if(srcfmt->Amask && dstfmt->Amask)
2416		    return BlitNtoNKeyCopyAlpha;
2417		else
2418		    return BlitNtoNKey;
2419	    }
2420	}
2421
2422	blitfun = NULL;
2423	if ( dstfmt->BitsPerPixel == 8 ) {
2424		/* We assume 8-bit destinations are palettized */
2425		if ( (srcfmt->BytesPerPixel == 4) &&
2426		     (srcfmt->Rmask == 0x00FF0000) &&
2427		     (srcfmt->Gmask == 0x0000FF00) &&
2428		     (srcfmt->Bmask == 0x000000FF) ) {
2429			if ( surface->map->table ) {
2430				blitfun = Blit_RGB888_index8_map;
2431			} else {
2432#if SDL_HERMES_BLITTERS
2433				sdata->aux_data = ConvertX86p32_8RGB332;
2434				blitfun = ConvertX86;
2435#else
2436				blitfun = Blit_RGB888_index8;
2437#endif
2438			}
2439		} else {
2440			blitfun = BlitNto1;
2441		}
2442	} else {
2443		/* Now the meat, choose the blitter we want */
2444		int a_need = NO_ALPHA;
2445		if(dstfmt->Amask)
2446		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
2447		table = normal_blit[srcfmt->BytesPerPixel-1];
2448		for ( which=0; table[which].dstbpp; ++which ) {
2449			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
2450			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
2451			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
2452			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
2453			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
2454			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
2455			    dstfmt->BytesPerPixel == table[which].dstbpp &&
2456			    (a_need & table[which].alpha) == a_need &&
2457			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
2458				break;
2459		}
2460		sdata->aux_data = table[which].aux_data;
2461		blitfun = table[which].blitfunc;
2462
2463		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
2464			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
2465			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
2466			     srcfmt->Rmask == dstfmt->Rmask &&
2467			     srcfmt->Gmask == dstfmt->Gmask &&
2468			     srcfmt->Bmask == dstfmt->Bmask ) {
2469				blitfun = Blit4to4MaskAlpha;
2470			} else if ( a_need == COPY_ALPHA ) {
2471			    blitfun = BlitNtoNCopyAlpha;
2472			}
2473		}
2474	}
2475
2476#ifdef DEBUG_ASM
2477#if SDL_HERMES_BLITTERS
2478	if ( blitfun == ConvertMMX )
2479		fprintf(stderr, "Using mmx blit\n");
2480	else
2481	if ( blitfun == ConvertX86 )
2482		fprintf(stderr, "Using asm blit\n");
2483	else
2484#endif
2485	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
2486		fprintf(stderr, "Using C blit\n");
2487	else
2488		fprintf(stderr, "Using optimized C blit\n");
2489#endif /* DEBUG_ASM */
2490
2491	return(blitfun);
2492}
2493