1/*
2    SDL - Simple DirectMedia Layer
3    Copyright (C) 1997-2012 Sam Lantinga
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19    Sam Lantinga
20    slouken@libsdl.org
21*/
22#include "SDL_config.h"
23
24/* This is the software implementation of the YUV video overlay support */
25
26/* This code was derived from code carrying the following copyright notices:
27
28 * Copyright (c) 1995 The Regents of the University of California.
29 * All rights reserved.
30 *
31 * Permission to use, copy, modify, and distribute this software and its
32 * documentation for any purpose, without fee, and without written agreement is
33 * hereby granted, provided that the above copyright notice and the following
34 * two paragraphs appear in all copies of this software.
35 *
36 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
37 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
38 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
39 * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 *
41 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
42 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
43 * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
44 * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
45 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
46
47 * Copyright (c) 1995 Erik Corry
48 * All rights reserved.
49 *
50 * Permission to use, copy, modify, and distribute this software and its
51 * documentation for any purpose, without fee, and without written agreement is
52 * hereby granted, provided that the above copyright notice and the following
53 * two paragraphs appear in all copies of this software.
54 *
55 * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
56 * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
57 * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
58 * OF THE POSSIBILITY OF SUCH DAMAGE.
59 *
60 * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
61 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
62 * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
63 * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
64 * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
65
66 * Portions of this software Copyright (c) 1995 Brown University.
67 * All rights reserved.
68 *
69 * Permission to use, copy, modify, and distribute this software and its
70 * documentation for any purpose, without fee, and without written agreement
71 * is hereby granted, provided that the above copyright notice and the
72 * following two paragraphs appear in all copies of this software.
73 *
74 * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
75 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
76 * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
77 * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
78 *
79 * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
80 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
81 * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
82 * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
83 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
84 */
85
86#include "SDL_video.h"
87#include "SDL_cpuinfo.h"
88#include "SDL_stretch_c.h"
89#include "SDL_yuvfuncs.h"
90#include "SDL_yuv_sw_c.h"
91
92/* The functions used to manipulate software video overlays */
93static struct private_yuvhwfuncs sw_yuvfuncs = {
94	SDL_LockYUV_SW,
95	SDL_UnlockYUV_SW,
96	SDL_DisplayYUV_SW,
97	SDL_FreeYUV_SW
98};
99
100/* RGB conversion lookup tables */
101struct private_yuvhwdata {
102	SDL_Surface *stretch;
103	SDL_Surface *display;
104	Uint8 *pixels;
105	int *colortab;
106	Uint32 *rgb_2_pix;
107	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
108                          unsigned char *lum, unsigned char *cr,
109                          unsigned char *cb, unsigned char *out,
110                          int rows, int cols, int mod );
111	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
112	                  unsigned char *lum, unsigned char *cr,
113                          unsigned char *cb, unsigned char *out,
114                          int rows, int cols, int mod );
115
116	/* These are just so we don't have to allocate them separately */
117	Uint16 pitches[3];
118	Uint8 *planes[3];
119};
120
121
122/* The colorspace conversion functions */
123
124#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
125extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
126                                     unsigned char *lum, unsigned char *cr,
127                                     unsigned char *cb, unsigned char *out,
128                                     int rows, int cols, int mod );
129extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
130                                     unsigned char *lum, unsigned char *cr,
131                                     unsigned char *cb, unsigned char *out,
132                                     int rows, int cols, int mod );
133#endif
134
135static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
136                                    unsigned char *lum, unsigned char *cr,
137                                    unsigned char *cb, unsigned char *out,
138                                    int rows, int cols, int mod )
139{
140    unsigned short* row1;
141    unsigned short* row2;
142    unsigned char* lum2;
143    int x, y;
144    int cr_r;
145    int crb_g;
146    int cb_b;
147    int cols_2 = cols / 2;
148
149    row1 = (unsigned short*) out;
150    row2 = row1 + cols + mod;
151    lum2 = lum + cols;
152
153    mod += cols + mod;
154
155    y = rows / 2;
156    while( y-- )
157    {
158        x = cols_2;
159        while( x-- )
160        {
161            register int L;
162
163            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
164            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
165                               + colortab[ *cb + 2*256 ];
166            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
167            ++cr; ++cb;
168
169            L = *lum++;
170            *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
171                                       rgb_2_pix[ L + crb_g ] |
172                                       rgb_2_pix[ L + cb_b ]);
173
174            L = *lum++;
175            *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
176                                       rgb_2_pix[ L + crb_g ] |
177                                       rgb_2_pix[ L + cb_b ]);
178
179
180            /* Now, do second row.  */
181
182            L = *lum2++;
183            *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
184                                       rgb_2_pix[ L + crb_g ] |
185                                       rgb_2_pix[ L + cb_b ]);
186
187            L = *lum2++;
188            *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
189                                       rgb_2_pix[ L + crb_g ] |
190                                       rgb_2_pix[ L + cb_b ]);
191        }
192
193        /*
194         * These values are at the start of the next line, (due
195         * to the ++'s above),but they need to be at the start
196         * of the line after that.
197         */
198        lum  += cols;
199        lum2 += cols;
200        row1 += mod;
201        row2 += mod;
202    }
203}
204
205static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
206                                    unsigned char *lum, unsigned char *cr,
207                                    unsigned char *cb, unsigned char *out,
208                                    int rows, int cols, int mod )
209{
210    unsigned int value;
211    unsigned char* row1;
212    unsigned char* row2;
213    unsigned char* lum2;
214    int x, y;
215    int cr_r;
216    int crb_g;
217    int cb_b;
218    int cols_2 = cols / 2;
219
220    row1 = out;
221    row2 = row1 + cols*3 + mod*3;
222    lum2 = lum + cols;
223
224    mod += cols + mod;
225    mod *= 3;
226
227    y = rows / 2;
228    while( y-- )
229    {
230        x = cols_2;
231        while( x-- )
232        {
233            register int L;
234
235            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
236            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
237                               + colortab[ *cb + 2*256 ];
238            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
239            ++cr; ++cb;
240
241            L = *lum++;
242            value = (rgb_2_pix[ L + cr_r ] |
243                     rgb_2_pix[ L + crb_g ] |
244                     rgb_2_pix[ L + cb_b ]);
245            *row1++ = (value      ) & 0xFF;
246            *row1++ = (value >>  8) & 0xFF;
247            *row1++ = (value >> 16) & 0xFF;
248
249            L = *lum++;
250            value = (rgb_2_pix[ L + cr_r ] |
251                     rgb_2_pix[ L + crb_g ] |
252                     rgb_2_pix[ L + cb_b ]);
253            *row1++ = (value      ) & 0xFF;
254            *row1++ = (value >>  8) & 0xFF;
255            *row1++ = (value >> 16) & 0xFF;
256
257
258            /* Now, do second row.  */
259
260            L = *lum2++;
261            value = (rgb_2_pix[ L + cr_r ] |
262                     rgb_2_pix[ L + crb_g ] |
263                     rgb_2_pix[ L + cb_b ]);
264            *row2++ = (value      ) & 0xFF;
265            *row2++ = (value >>  8) & 0xFF;
266            *row2++ = (value >> 16) & 0xFF;
267
268            L = *lum2++;
269            value = (rgb_2_pix[ L + cr_r ] |
270                     rgb_2_pix[ L + crb_g ] |
271                     rgb_2_pix[ L + cb_b ]);
272            *row2++ = (value      ) & 0xFF;
273            *row2++ = (value >>  8) & 0xFF;
274            *row2++ = (value >> 16) & 0xFF;
275        }
276
277        /*
278         * These values are at the start of the next line, (due
279         * to the ++'s above),but they need to be at the start
280         * of the line after that.
281         */
282        lum  += cols;
283        lum2 += cols;
284        row1 += mod;
285        row2 += mod;
286    }
287}
288
289static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
290                                    unsigned char *lum, unsigned char *cr,
291                                    unsigned char *cb, unsigned char *out,
292                                    int rows, int cols, int mod )
293{
294    unsigned int* row1;
295    unsigned int* row2;
296    unsigned char* lum2;
297    int x, y;
298    int cr_r;
299    int crb_g;
300    int cb_b;
301    int cols_2 = cols / 2;
302
303    row1 = (unsigned int*) out;
304    row2 = row1 + cols + mod;
305    lum2 = lum + cols;
306
307    mod += cols + mod;
308
309    y = rows / 2;
310    while( y-- )
311    {
312        x = cols_2;
313        while( x-- )
314        {
315            register int L;
316
317            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
318            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
319                               + colortab[ *cb + 2*256 ];
320            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
321            ++cr; ++cb;
322
323            L = *lum++;
324            *row1++ = (rgb_2_pix[ L + cr_r ] |
325                       rgb_2_pix[ L + crb_g ] |
326                       rgb_2_pix[ L + cb_b ]);
327
328            L = *lum++;
329            *row1++ = (rgb_2_pix[ L + cr_r ] |
330                       rgb_2_pix[ L + crb_g ] |
331                       rgb_2_pix[ L + cb_b ]);
332
333
334            /* Now, do second row.  */
335
336            L = *lum2++;
337            *row2++ = (rgb_2_pix[ L + cr_r ] |
338                       rgb_2_pix[ L + crb_g ] |
339                       rgb_2_pix[ L + cb_b ]);
340
341            L = *lum2++;
342            *row2++ = (rgb_2_pix[ L + cr_r ] |
343                       rgb_2_pix[ L + crb_g ] |
344                       rgb_2_pix[ L + cb_b ]);
345        }
346
347        /*
348         * These values are at the start of the next line, (due
349         * to the ++'s above),but they need to be at the start
350         * of the line after that.
351         */
352        lum  += cols;
353        lum2 += cols;
354        row1 += mod;
355        row2 += mod;
356    }
357}
358
359/*
360 * In this function I make use of a nasty trick. The tables have the lower
361 * 16 bits replicated in the upper 16. This means I can write ints and get
362 * the horisontal doubling for free (almost).
363 */
364static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
365                                    unsigned char *lum, unsigned char *cr,
366                                    unsigned char *cb, unsigned char *out,
367                                    int rows, int cols, int mod )
368{
369    unsigned int* row1 = (unsigned int*) out;
370    const int next_row = cols+(mod/2);
371    unsigned int* row2 = row1 + 2*next_row;
372    unsigned char* lum2;
373    int x, y;
374    int cr_r;
375    int crb_g;
376    int cb_b;
377    int cols_2 = cols / 2;
378
379    lum2 = lum + cols;
380
381    mod = (next_row * 3) + (mod/2);
382
383    y = rows / 2;
384    while( y-- )
385    {
386        x = cols_2;
387        while( x-- )
388        {
389            register int L;
390
391            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
392            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
393                               + colortab[ *cb + 2*256 ];
394            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
395            ++cr; ++cb;
396
397            L = *lum++;
398            row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
399                                        rgb_2_pix[ L + crb_g ] |
400                                        rgb_2_pix[ L + cb_b ]);
401            row1++;
402
403            L = *lum++;
404            row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
405                                        rgb_2_pix[ L + crb_g ] |
406                                        rgb_2_pix[ L + cb_b ]);
407            row1++;
408
409
410            /* Now, do second row. */
411
412            L = *lum2++;
413            row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
414                                        rgb_2_pix[ L + crb_g ] |
415                                        rgb_2_pix[ L + cb_b ]);
416            row2++;
417
418            L = *lum2++;
419            row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
420                                        rgb_2_pix[ L + crb_g ] |
421                                        rgb_2_pix[ L + cb_b ]);
422            row2++;
423        }
424
425        /*
426         * These values are at the start of the next line, (due
427         * to the ++'s above),but they need to be at the start
428         * of the line after that.
429         */
430        lum  += cols;
431        lum2 += cols;
432        row1 += mod;
433        row2 += mod;
434    }
435}
436
437static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
438                                    unsigned char *lum, unsigned char *cr,
439                                    unsigned char *cb, unsigned char *out,
440                                    int rows, int cols, int mod )
441{
442    unsigned int value;
443    unsigned char* row1 = out;
444    const int next_row = (cols*2 + mod) * 3;
445    unsigned char* row2 = row1 + 2*next_row;
446    unsigned char* lum2;
447    int x, y;
448    int cr_r;
449    int crb_g;
450    int cb_b;
451    int cols_2 = cols / 2;
452
453    lum2 = lum + cols;
454
455    mod = next_row*3 + mod*3;
456
457    y = rows / 2;
458    while( y-- )
459    {
460        x = cols_2;
461        while( x-- )
462        {
463            register int L;
464
465            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
466            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
467                               + colortab[ *cb + 2*256 ];
468            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
469            ++cr; ++cb;
470
471            L = *lum++;
472            value = (rgb_2_pix[ L + cr_r ] |
473                     rgb_2_pix[ L + crb_g ] |
474                     rgb_2_pix[ L + cb_b ]);
475            row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
476                     (value      ) & 0xFF;
477            row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
478                     (value >>  8) & 0xFF;
479            row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
480                     (value >> 16) & 0xFF;
481            row1 += 2*3;
482
483            L = *lum++;
484            value = (rgb_2_pix[ L + cr_r ] |
485                     rgb_2_pix[ L + crb_g ] |
486                     rgb_2_pix[ L + cb_b ]);
487            row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
488                     (value      ) & 0xFF;
489            row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
490                     (value >>  8) & 0xFF;
491            row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
492                     (value >> 16) & 0xFF;
493            row1 += 2*3;
494
495
496            /* Now, do second row. */
497
498            L = *lum2++;
499            value = (rgb_2_pix[ L + cr_r ] |
500                     rgb_2_pix[ L + crb_g ] |
501                     rgb_2_pix[ L + cb_b ]);
502            row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
503                     (value      ) & 0xFF;
504            row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
505                     (value >>  8) & 0xFF;
506            row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
507                     (value >> 16) & 0xFF;
508            row2 += 2*3;
509
510            L = *lum2++;
511            value = (rgb_2_pix[ L + cr_r ] |
512                     rgb_2_pix[ L + crb_g ] |
513                     rgb_2_pix[ L + cb_b ]);
514            row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
515                     (value      ) & 0xFF;
516            row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
517                     (value >>  8) & 0xFF;
518            row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
519                     (value >> 16) & 0xFF;
520            row2 += 2*3;
521        }
522
523        /*
524         * These values are at the start of the next line, (due
525         * to the ++'s above),but they need to be at the start
526         * of the line after that.
527         */
528        lum  += cols;
529        lum2 += cols;
530        row1 += mod;
531        row2 += mod;
532    }
533}
534
535static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
536                                    unsigned char *lum, unsigned char *cr,
537                                    unsigned char *cb, unsigned char *out,
538                                    int rows, int cols, int mod )
539{
540    unsigned int* row1 = (unsigned int*) out;
541    const int next_row = cols*2+mod;
542    unsigned int* row2 = row1 + 2*next_row;
543    unsigned char* lum2;
544    int x, y;
545    int cr_r;
546    int crb_g;
547    int cb_b;
548    int cols_2 = cols / 2;
549
550    lum2 = lum + cols;
551
552    mod = (next_row * 3) + mod;
553
554    y = rows / 2;
555    while( y-- )
556    {
557        x = cols_2;
558        while( x-- )
559        {
560            register int L;
561
562            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
563            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
564                               + colortab[ *cb + 2*256 ];
565            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
566            ++cr; ++cb;
567
568            L = *lum++;
569            row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
570                                       (rgb_2_pix[ L + cr_r ] |
571                                        rgb_2_pix[ L + crb_g ] |
572                                        rgb_2_pix[ L + cb_b ]);
573            row1 += 2;
574
575            L = *lum++;
576            row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
577                                       (rgb_2_pix[ L + cr_r ] |
578                                        rgb_2_pix[ L + crb_g ] |
579                                        rgb_2_pix[ L + cb_b ]);
580            row1 += 2;
581
582
583            /* Now, do second row. */
584
585            L = *lum2++;
586            row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
587                                       (rgb_2_pix[ L + cr_r ] |
588                                        rgb_2_pix[ L + crb_g ] |
589                                        rgb_2_pix[ L + cb_b ]);
590            row2 += 2;
591
592            L = *lum2++;
593            row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
594                                       (rgb_2_pix[ L + cr_r ] |
595                                        rgb_2_pix[ L + crb_g ] |
596                                        rgb_2_pix[ L + cb_b ]);
597            row2 += 2;
598        }
599
600        /*
601         * These values are at the start of the next line, (due
602         * to the ++'s above),but they need to be at the start
603         * of the line after that.
604         */
605        lum  += cols;
606        lum2 += cols;
607        row1 += mod;
608        row2 += mod;
609    }
610}
611
612static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
613                                    unsigned char *lum, unsigned char *cr,
614                                    unsigned char *cb, unsigned char *out,
615                                    int rows, int cols, int mod )
616{
617    unsigned short* row;
618    int x, y;
619    int cr_r;
620    int crb_g;
621    int cb_b;
622    int cols_2 = cols / 2;
623
624    row = (unsigned short*) out;
625
626    y = rows;
627    while( y-- )
628    {
629        x = cols_2;
630        while( x-- )
631        {
632            register int L;
633
634            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
635            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
636                               + colortab[ *cb + 2*256 ];
637            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
638            cr += 4; cb += 4;
639
640            L = *lum; lum += 2;
641            *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
642                                      rgb_2_pix[ L + crb_g ] |
643                                      rgb_2_pix[ L + cb_b ]);
644
645            L = *lum; lum += 2;
646            *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
647                                      rgb_2_pix[ L + crb_g ] |
648                                      rgb_2_pix[ L + cb_b ]);
649
650        }
651
652        row += mod;
653    }
654}
655
656static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
657                                    unsigned char *lum, unsigned char *cr,
658                                    unsigned char *cb, unsigned char *out,
659                                    int rows, int cols, int mod )
660{
661    unsigned int value;
662    unsigned char* row;
663    int x, y;
664    int cr_r;
665    int crb_g;
666    int cb_b;
667    int cols_2 = cols / 2;
668
669    row = (unsigned char*) out;
670    mod *= 3;
671    y = rows;
672    while( y-- )
673    {
674        x = cols_2;
675        while( x-- )
676        {
677            register int L;
678
679            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
680            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
681                               + colortab[ *cb + 2*256 ];
682            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
683            cr += 4; cb += 4;
684
685            L = *lum; lum += 2;
686            value = (rgb_2_pix[ L + cr_r ] |
687                     rgb_2_pix[ L + crb_g ] |
688                     rgb_2_pix[ L + cb_b ]);
689            *row++ = (value      ) & 0xFF;
690            *row++ = (value >>  8) & 0xFF;
691            *row++ = (value >> 16) & 0xFF;
692
693            L = *lum; lum += 2;
694            value = (rgb_2_pix[ L + cr_r ] |
695                     rgb_2_pix[ L + crb_g ] |
696                     rgb_2_pix[ L + cb_b ]);
697            *row++ = (value      ) & 0xFF;
698            *row++ = (value >>  8) & 0xFF;
699            *row++ = (value >> 16) & 0xFF;
700
701        }
702        row += mod;
703    }
704}
705
706static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
707                                    unsigned char *lum, unsigned char *cr,
708                                    unsigned char *cb, unsigned char *out,
709                                    int rows, int cols, int mod )
710{
711    unsigned int* row;
712    int x, y;
713    int cr_r;
714    int crb_g;
715    int cb_b;
716    int cols_2 = cols / 2;
717
718    row = (unsigned int*) out;
719    y = rows;
720    while( y-- )
721    {
722        x = cols_2;
723        while( x-- )
724        {
725            register int L;
726
727            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
728            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
729                               + colortab[ *cb + 2*256 ];
730            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
731            cr += 4; cb += 4;
732
733            L = *lum; lum += 2;
734            *row++ = (rgb_2_pix[ L + cr_r ] |
735                       rgb_2_pix[ L + crb_g ] |
736                       rgb_2_pix[ L + cb_b ]);
737
738            L = *lum; lum += 2;
739            *row++ = (rgb_2_pix[ L + cr_r ] |
740                       rgb_2_pix[ L + crb_g ] |
741                       rgb_2_pix[ L + cb_b ]);
742
743
744        }
745        row += mod;
746    }
747}
748
749/*
750 * In this function I make use of a nasty trick. The tables have the lower
751 * 16 bits replicated in the upper 16. This means I can write ints and get
752 * the horisontal doubling for free (almost).
753 */
754static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
755                                    unsigned char *lum, unsigned char *cr,
756                                    unsigned char *cb, unsigned char *out,
757                                    int rows, int cols, int mod )
758{
759    unsigned int* row = (unsigned int*) out;
760    const int next_row = cols+(mod/2);
761    int x, y;
762    int cr_r;
763    int crb_g;
764    int cb_b;
765    int cols_2 = cols / 2;
766
767    y = rows;
768    while( y-- )
769    {
770        x = cols_2;
771        while( x-- )
772        {
773            register int L;
774
775            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
776            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
777                               + colortab[ *cb + 2*256 ];
778            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
779            cr += 4; cb += 4;
780
781            L = *lum; lum += 2;
782            row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
783                                        rgb_2_pix[ L + crb_g ] |
784                                        rgb_2_pix[ L + cb_b ]);
785            row++;
786
787            L = *lum; lum += 2;
788            row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
789                                        rgb_2_pix[ L + crb_g ] |
790                                        rgb_2_pix[ L + cb_b ]);
791            row++;
792
793        }
794        row += next_row;
795    }
796}
797
798static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
799                                    unsigned char *lum, unsigned char *cr,
800                                    unsigned char *cb, unsigned char *out,
801                                    int rows, int cols, int mod )
802{
803    unsigned int value;
804    unsigned char* row = out;
805    const int next_row = (cols*2 + mod) * 3;
806    int x, y;
807    int cr_r;
808    int crb_g;
809    int cb_b;
810    int cols_2 = cols / 2;
811    y = rows;
812    while( y-- )
813    {
814        x = cols_2;
815        while( x-- )
816        {
817            register int L;
818
819            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
820            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
821                               + colortab[ *cb + 2*256 ];
822            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
823            cr += 4; cb += 4;
824
825            L = *lum; lum += 2;
826            value = (rgb_2_pix[ L + cr_r ] |
827                     rgb_2_pix[ L + crb_g ] |
828                     rgb_2_pix[ L + cb_b ]);
829            row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
830                     (value      ) & 0xFF;
831            row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
832                     (value >>  8) & 0xFF;
833            row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
834                     (value >> 16) & 0xFF;
835            row += 2*3;
836
837            L = *lum; lum += 2;
838            value = (rgb_2_pix[ L + cr_r ] |
839                     rgb_2_pix[ L + crb_g ] |
840                     rgb_2_pix[ L + cb_b ]);
841            row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
842                     (value      ) & 0xFF;
843            row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
844                     (value >>  8) & 0xFF;
845            row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
846                     (value >> 16) & 0xFF;
847            row += 2*3;
848
849        }
850        row += next_row;
851    }
852}
853
854static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
855                                    unsigned char *lum, unsigned char *cr,
856                                    unsigned char *cb, unsigned char *out,
857                                    int rows, int cols, int mod )
858{
859    unsigned int* row = (unsigned int*) out;
860    const int next_row = cols*2+mod;
861    int x, y;
862    int cr_r;
863    int crb_g;
864    int cb_b;
865    int cols_2 = cols / 2;
866    mod+=mod;
867    y = rows;
868    while( y-- )
869    {
870        x = cols_2;
871        while( x-- )
872        {
873            register int L;
874
875            cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
876            crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
877                               + colortab[ *cb + 2*256 ];
878            cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
879            cr += 4; cb += 4;
880
881            L = *lum; lum += 2;
882            row[0] = row[1] = row[next_row] = row[next_row+1] =
883                                       (rgb_2_pix[ L + cr_r ] |
884                                        rgb_2_pix[ L + crb_g ] |
885                                        rgb_2_pix[ L + cb_b ]);
886            row += 2;
887
888            L = *lum; lum += 2;
889            row[0] = row[1] = row[next_row] = row[next_row+1] =
890                                       (rgb_2_pix[ L + cr_r ] |
891                                        rgb_2_pix[ L + crb_g ] |
892                                        rgb_2_pix[ L + cb_b ]);
893            row += 2;
894
895
896        }
897
898        row += next_row;
899    }
900}
901
902/*
903 * How many 1 bits are there in the Uint32.
904 * Low performance, do not call often.
905 */
906static int number_of_bits_set( Uint32 a )
907{
908    if(!a) return 0;
909    if(a & 1) return 1 + number_of_bits_set(a >> 1);
910    return(number_of_bits_set(a >> 1));
911}
912
913/*
914 * How many 0 bits are there at least significant end of Uint32.
915 * Low performance, do not call often.
916 */
917static int free_bits_at_bottom( Uint32 a )
918{
919      /* assume char is 8 bits */
920    if(!a) return sizeof(Uint32) * 8;
921    if(((Sint32)a) & 1l) return 0;
922    return 1 + free_bits_at_bottom ( a >> 1);
923}
924
925
926SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
927{
928	SDL_Overlay *overlay;
929	struct private_yuvhwdata *swdata;
930	int *Cr_r_tab;
931	int *Cr_g_tab;
932	int *Cb_g_tab;
933	int *Cb_b_tab;
934	Uint32 *r_2_pix_alloc;
935	Uint32 *g_2_pix_alloc;
936	Uint32 *b_2_pix_alloc;
937	int i;
938	int CR, CB;
939	Uint32 Rmask, Gmask, Bmask;
940
941	/* Only RGB packed pixel conversion supported */
942	if ( (display->format->BytesPerPixel != 2) &&
943	     (display->format->BytesPerPixel != 3) &&
944	     (display->format->BytesPerPixel != 4) ) {
945		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
946		return(NULL);
947	}
948
949	/* Verify that we support the format */
950	switch (format) {
951	    case SDL_YV12_OVERLAY:
952	    case SDL_IYUV_OVERLAY:
953	    case SDL_YUY2_OVERLAY:
954	    case SDL_UYVY_OVERLAY:
955	    case SDL_YVYU_OVERLAY:
956		break;
957	    default:
958		SDL_SetError("Unsupported YUV format");
959		return(NULL);
960	}
961
962	/* Create the overlay structure */
963	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
964	if ( overlay == NULL ) {
965		SDL_OutOfMemory();
966		return(NULL);
967	}
968	SDL_memset(overlay, 0, (sizeof *overlay));
969
970	/* Fill in the basic members */
971	overlay->format = format;
972	overlay->w = width;
973	overlay->h = height;
974
975	/* Set up the YUV surface function structure */
976	overlay->hwfuncs = &sw_yuvfuncs;
977
978	/* Create the pixel data and lookup tables */
979	swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
980	overlay->hwdata = swdata;
981	if ( swdata == NULL ) {
982		SDL_OutOfMemory();
983		SDL_FreeYUVOverlay(overlay);
984		return(NULL);
985	}
986	swdata->stretch = NULL;
987	swdata->display = display;
988	swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
989	swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
990	Cr_r_tab = &swdata->colortab[0*256];
991	Cr_g_tab = &swdata->colortab[1*256];
992	Cb_g_tab = &swdata->colortab[2*256];
993	Cb_b_tab = &swdata->colortab[3*256];
994	swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
995	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
996	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
997	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
998	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
999		SDL_OutOfMemory();
1000		SDL_FreeYUVOverlay(overlay);
1001		return(NULL);
1002	}
1003
1004	/* Generate the tables for the display surface */
1005	for (i=0; i<256; i++) {
1006		/* Gamma correction (luminescence table) and chroma correction
1007		   would be done here.  See the Berkeley mpeg_play sources.
1008		*/
1009		CB = CR = (i-128);
1010		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
1011		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
1012		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB);
1013		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
1014	}
1015
1016	/*
1017	 * Set up entries 0-255 in rgb-to-pixel value tables.
1018	 */
1019	Rmask = display->format->Rmask;
1020	Gmask = display->format->Gmask;
1021	Bmask = display->format->Bmask;
1022	for ( i=0; i<256; ++i ) {
1023		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
1024		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
1025		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
1026		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
1027		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
1028		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
1029	}
1030
1031	/*
1032	 * If we have 16-bit output depth, then we double the value
1033	 * in the top word. This means that we can write out both
1034	 * pixels in the pixel doubling mode with one op. It is
1035	 * harmless in the normal case as storing a 32-bit value
1036	 * through a short pointer will lose the top bits anyway.
1037	 */
1038	if( display->format->BytesPerPixel == 2 ) {
1039		for ( i=0; i<256; ++i ) {
1040			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
1041			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
1042			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
1043		}
1044	}
1045
1046	/*
1047	 * Spread out the values we have to the rest of the array so that
1048	 * we do not need to check for overflow.
1049	 */
1050	for ( i=0; i<256; ++i ) {
1051		r_2_pix_alloc[i] = r_2_pix_alloc[256];
1052		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
1053		g_2_pix_alloc[i] = g_2_pix_alloc[256];
1054		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
1055		b_2_pix_alloc[i] = b_2_pix_alloc[256];
1056		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
1057	}
1058
1059	/* You have chosen wisely... */
1060	switch (format) {
1061	    case SDL_YV12_OVERLAY:
1062	    case SDL_IYUV_OVERLAY:
1063		if ( display->format->BytesPerPixel == 2 ) {
1064#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
1065			/* inline assembly functions */
1066			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
1067			                     (Gmask == 0x07E0) &&
1068				             (Bmask == 0x001F) &&
1069			                     (width & 15) == 0) {
1070/*printf("Using MMX 16-bit 565 dither\n");*/
1071				swdata->Display1X = Color565DitherYV12MMX1X;
1072			} else {
1073/*printf("Using C 16-bit dither\n");*/
1074				swdata->Display1X = Color16DitherYV12Mod1X;
1075			}
1076#else
1077			swdata->Display1X = Color16DitherYV12Mod1X;
1078#endif
1079			swdata->Display2X = Color16DitherYV12Mod2X;
1080		}
1081		if ( display->format->BytesPerPixel == 3 ) {
1082			swdata->Display1X = Color24DitherYV12Mod1X;
1083			swdata->Display2X = Color24DitherYV12Mod2X;
1084		}
1085		if ( display->format->BytesPerPixel == 4 ) {
1086#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
1087			/* inline assembly functions */
1088			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
1089			                     (Gmask == 0x0000FF00) &&
1090				             (Bmask == 0x000000FF) &&
1091			                     (width & 15) == 0) {
1092/*printf("Using MMX 32-bit dither\n");*/
1093				swdata->Display1X = ColorRGBDitherYV12MMX1X;
1094			} else {
1095/*printf("Using C 32-bit dither\n");*/
1096				swdata->Display1X = Color32DitherYV12Mod1X;
1097			}
1098#else
1099			swdata->Display1X = Color32DitherYV12Mod1X;
1100#endif
1101			swdata->Display2X = Color32DitherYV12Mod2X;
1102		}
1103		break;
1104	    case SDL_YUY2_OVERLAY:
1105	    case SDL_UYVY_OVERLAY:
1106	    case SDL_YVYU_OVERLAY:
1107		if ( display->format->BytesPerPixel == 2 ) {
1108			swdata->Display1X = Color16DitherYUY2Mod1X;
1109			swdata->Display2X = Color16DitherYUY2Mod2X;
1110		}
1111		if ( display->format->BytesPerPixel == 3 ) {
1112			swdata->Display1X = Color24DitherYUY2Mod1X;
1113			swdata->Display2X = Color24DitherYUY2Mod2X;
1114		}
1115		if ( display->format->BytesPerPixel == 4 ) {
1116			swdata->Display1X = Color32DitherYUY2Mod1X;
1117			swdata->Display2X = Color32DitherYUY2Mod2X;
1118		}
1119		break;
1120	    default:
1121		/* We should never get here (caught above) */
1122		break;
1123	}
1124
1125	/* Find the pitch and offset values for the overlay */
1126	overlay->pitches = swdata->pitches;
1127	overlay->pixels = swdata->planes;
1128	switch (format) {
1129	    case SDL_YV12_OVERLAY:
1130	    case SDL_IYUV_OVERLAY:
1131		overlay->pitches[0] = overlay->w;
1132		overlay->pitches[1] = overlay->pitches[0] / 2;
1133		overlay->pitches[2] = overlay->pitches[0] / 2;
1134	        overlay->pixels[0] = swdata->pixels;
1135	        overlay->pixels[1] = overlay->pixels[0] +
1136		                     overlay->pitches[0] * overlay->h;
1137	        overlay->pixels[2] = overlay->pixels[1] +
1138		                     overlay->pitches[1] * overlay->h / 2;
1139		overlay->planes = 3;
1140		break;
1141	    case SDL_YUY2_OVERLAY:
1142	    case SDL_UYVY_OVERLAY:
1143	    case SDL_YVYU_OVERLAY:
1144		overlay->pitches[0] = overlay->w*2;
1145	        overlay->pixels[0] = swdata->pixels;
1146		overlay->planes = 1;
1147		break;
1148	    default:
1149		/* We should never get here (caught above) */
1150		break;
1151	}
1152
1153	/* We're all done.. */
1154	return(overlay);
1155}
1156
1157int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
1158{
1159	return(0);
1160}
1161
1162void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
1163{
1164	return;
1165}
1166
1167int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
1168{
1169	struct private_yuvhwdata *swdata;
1170	int stretch;
1171	int scale_2x;
1172	SDL_Surface *display;
1173	Uint8 *lum, *Cr, *Cb;
1174	Uint8 *dstp;
1175	int mod;
1176
1177	swdata = overlay->hwdata;
1178	stretch = 0;
1179	scale_2x = 0;
1180	if ( src->x || src->y || src->w < overlay->w || src->h < overlay->h ) {
1181		/* The source rectangle has been clipped.
1182		   Using a scratch surface is easier than adding clipped
1183		   source support to all the blitters, plus that would
1184		   slow them down in the general unclipped case.
1185		*/
1186		stretch = 1;
1187	} else if ( (src->w != dst->w) || (src->h != dst->h) ) {
1188		if ( (dst->w == 2*src->w) &&
1189		     (dst->h == 2*src->h) ) {
1190			scale_2x = 1;
1191		} else {
1192			stretch = 1;
1193		}
1194	}
1195	if ( stretch ) {
1196		if ( ! swdata->stretch ) {
1197			display = swdata->display;
1198			swdata->stretch = SDL_CreateRGBSurface(
1199				SDL_SWSURFACE,
1200				overlay->w, overlay->h,
1201				display->format->BitsPerPixel,
1202				display->format->Rmask,
1203				display->format->Gmask,
1204				display->format->Bmask, 0);
1205			if ( ! swdata->stretch ) {
1206				return(-1);
1207			}
1208		}
1209		display = swdata->stretch;
1210	} else {
1211		display = swdata->display;
1212	}
1213	switch (overlay->format) {
1214	    case SDL_YV12_OVERLAY:
1215		lum = overlay->pixels[0];
1216		Cr =  overlay->pixels[1];
1217		Cb =  overlay->pixels[2];
1218		break;
1219	    case SDL_IYUV_OVERLAY:
1220		lum = overlay->pixels[0];
1221		Cr =  overlay->pixels[2];
1222		Cb =  overlay->pixels[1];
1223		break;
1224	    case SDL_YUY2_OVERLAY:
1225		lum = overlay->pixels[0];
1226		Cr = lum + 3;
1227		Cb = lum + 1;
1228		break;
1229	    case SDL_UYVY_OVERLAY:
1230		lum = overlay->pixels[0]+1;
1231		Cr = lum + 1;
1232		Cb = lum - 1;
1233		break;
1234	    case SDL_YVYU_OVERLAY:
1235		lum = overlay->pixels[0];
1236		Cr = lum + 1;
1237		Cb = lum + 3;
1238		break;
1239	    default:
1240		SDL_SetError("Unsupported YUV format in blit");
1241		return(-1);
1242	}
1243	if ( SDL_MUSTLOCK(display) ) {
1244        	if ( SDL_LockSurface(display) < 0 ) {
1245			return(-1);
1246		}
1247	}
1248	if ( stretch ) {
1249		dstp = (Uint8 *)swdata->stretch->pixels;
1250	} else {
1251		dstp = (Uint8 *)display->pixels
1252			+ dst->x * display->format->BytesPerPixel
1253			+ dst->y * display->pitch;
1254	}
1255	mod = (display->pitch / display->format->BytesPerPixel);
1256
1257	if ( scale_2x ) {
1258		mod -= (overlay->w * 2);
1259		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
1260		                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
1261	} else {
1262		mod -= overlay->w;
1263		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
1264		                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
1265	}
1266	if ( SDL_MUSTLOCK(display) ) {
1267		SDL_UnlockSurface(display);
1268	}
1269	if ( stretch ) {
1270		display = swdata->display;
1271		SDL_SoftStretch(swdata->stretch, src, display, dst);
1272	}
1273	SDL_UpdateRects(display, 1, dst);
1274
1275	return(0);
1276}
1277
1278void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
1279{
1280	struct private_yuvhwdata *swdata;
1281
1282	swdata = overlay->hwdata;
1283	if ( swdata ) {
1284		if ( swdata->stretch ) {
1285			SDL_FreeSurface(swdata->stretch);
1286		}
1287		if ( swdata->pixels ) {
1288			SDL_free(swdata->pixels);
1289		}
1290		if ( swdata->colortab ) {
1291			SDL_free(swdata->colortab);
1292		}
1293		if ( swdata->rgb_2_pix ) {
1294			SDL_free(swdata->rgb_2_pix);
1295		}
1296		SDL_free(swdata);
1297		overlay->hwdata = NULL;
1298	}
1299}
1300