1/*
2    SDL - Simple DirectMedia Layer
3    Copyright (C) 1997-2012 Sam Lantinga
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19    Sam Lantinga
20    slouken@libsdl.org
21*/
22#include "SDL_config.h"
23
24/* This a stretch blit implementation based on ideas given to me by
25   Tomasz Cejner - thanks! :)
26
27   April 27, 2000 - Sam Lantinga
28*/
29
30#include "SDL_video.h"
31#include "SDL_blit.h"
32
33/* This isn't ready for general consumption yet - it should be folded
34   into the general blitting mechanism.
35*/
36
37#if ((defined(_MFC_VER) && defined(_M_IX86)/* && !defined(_WIN32_WCE) still needed? */) || \
38     defined(__WATCOMC__) || \
39     (defined(__GNUC__) && defined(__i386__))) && SDL_ASSEMBLY_ROUTINES
40/* There's a bug with gcc 4.4.1 and -O2 where srcp doesn't get the correct
41 * value after the first scanline.  FIXME? */
42/*#define USE_ASM_STRETCH*/
43#endif
44
45#ifdef USE_ASM_STRETCH
46
47#ifdef HAVE_MPROTECT
48#include <sys/types.h>
49#include <sys/mman.h>
50#endif
51#ifdef __GNUC__
52#define PAGE_ALIGNED __attribute__((__aligned__(4096)))
53#else
54#define PAGE_ALIGNED
55#endif
56
57#if defined(_M_IX86) || defined(i386)
58#define PREFIX16	0x66
59#define STORE_BYTE	0xAA
60#define STORE_WORD	0xAB
61#define LOAD_BYTE	0xAC
62#define LOAD_WORD	0xAD
63#define RETURN		0xC3
64#else
65#error Need assembly opcodes for this architecture
66#endif
67
68static unsigned char copy_row[4096] PAGE_ALIGNED;
69
70static int generate_rowbytes(int src_w, int dst_w, int bpp)
71{
72	static struct {
73		int bpp;
74		int src_w;
75		int dst_w;
76		int status;
77	} last;
78
79	int i;
80	int pos, inc;
81	unsigned char *eip, *fence;
82	unsigned char load, store;
83
84	/* See if we need to regenerate the copy buffer */
85	if ( (src_w == last.src_w) &&
86	     (dst_w == last.dst_w) && (bpp == last.bpp) ) {
87		return(last.status);
88	}
89	last.bpp = bpp;
90	last.src_w = src_w;
91	last.dst_w = dst_w;
92	last.status = -1;
93
94	switch (bpp) {
95	    case 1:
96		load = LOAD_BYTE;
97		store = STORE_BYTE;
98		break;
99	    case 2:
100	    case 4:
101		load = LOAD_WORD;
102		store = STORE_WORD;
103		break;
104	    default:
105		SDL_SetError("ASM stretch of %d bytes isn't supported\n", bpp);
106		return(-1);
107	}
108#ifdef HAVE_MPROTECT
109	/* Make the code writeable */
110	if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_WRITE) < 0 ) {
111		SDL_SetError("Couldn't make copy buffer writeable");
112		return(-1);
113	}
114#endif
115	pos = 0x10000;
116	inc = (src_w << 16) / dst_w;
117	eip = copy_row;
118	fence = copy_row+sizeof(copy_row)-2;
119	for ( i=0; i<dst_w && eip < end; ++i ) {
120		while ( pos >= 0x10000L ) {
121			if ( eip == fence ) {
122				return -1;
123			}
124			if ( bpp == 2 ) {
125				*eip++ = PREFIX16;
126			}
127			*eip++ = load;
128			pos -= 0x10000L;
129		}
130		if ( eip == fence ) {
131			return -1;
132		}
133		if ( bpp == 2 ) {
134			*eip++ = PREFIX16;
135		}
136		*eip++ = store;
137		pos += inc;
138	}
139	*eip++ = RETURN;
140
141#ifdef HAVE_MPROTECT
142	/* Make the code executable but not writeable */
143	if ( mprotect(copy_row, sizeof(copy_row), PROT_READ|PROT_EXEC) < 0 ) {
144		SDL_SetError("Couldn't make copy buffer executable");
145		return(-1);
146	}
147#endif
148	last.status = 0;
149	return(0);
150}
151
152#endif /* USE_ASM_STRETCH */
153
154#define DEFINE_COPY_ROW(name, type)			\
155void name(type *src, int src_w, type *dst, int dst_w)	\
156{							\
157	int i;						\
158	int pos, inc;					\
159	type pixel = 0;					\
160							\
161	pos = 0x10000;					\
162	inc = (src_w << 16) / dst_w;			\
163	for ( i=dst_w; i>0; --i ) {			\
164		while ( pos >= 0x10000L ) {		\
165			pixel = *src++;			\
166			pos -= 0x10000L;		\
167		}					\
168		*dst++ = pixel;				\
169		pos += inc;				\
170	}						\
171}
172DEFINE_COPY_ROW(copy_row1, Uint8)
173DEFINE_COPY_ROW(copy_row2, Uint16)
174DEFINE_COPY_ROW(copy_row4, Uint32)
175
176/* The ASM code doesn't handle 24-bpp stretch blits */
177void copy_row3(Uint8 *src, int src_w, Uint8 *dst, int dst_w)
178{
179	int i;
180	int pos, inc;
181	Uint8 pixel[3] = { 0, 0, 0 };
182
183	pos = 0x10000;
184	inc = (src_w << 16) / dst_w;
185	for ( i=dst_w; i>0; --i ) {
186		while ( pos >= 0x10000L ) {
187			pixel[0] = *src++;
188			pixel[1] = *src++;
189			pixel[2] = *src++;
190			pos -= 0x10000L;
191		}
192		*dst++ = pixel[0];
193		*dst++ = pixel[1];
194		*dst++ = pixel[2];
195		pos += inc;
196	}
197}
198
199/* Perform a stretch blit between two surfaces of the same format.
200   NOTE:  This function is not safe to call from multiple threads!
201*/
202int SDL_SoftStretch(SDL_Surface *src, SDL_Rect *srcrect,
203                    SDL_Surface *dst, SDL_Rect *dstrect)
204{
205	int src_locked;
206	int dst_locked;
207	int pos, inc;
208	int dst_maxrow;
209	int src_row, dst_row;
210	Uint8 *srcp = NULL;
211	Uint8 *dstp;
212	SDL_Rect full_src;
213	SDL_Rect full_dst;
214#ifdef USE_ASM_STRETCH
215	SDL_bool use_asm = SDL_TRUE;
216#ifdef __GNUC__
217	int u1, u2;
218#endif
219#endif /* USE_ASM_STRETCH */
220	const int bpp = dst->format->BytesPerPixel;
221
222	if ( src->format->BitsPerPixel != dst->format->BitsPerPixel ) {
223		SDL_SetError("Only works with same format surfaces");
224		return(-1);
225	}
226
227	/* Verify the blit rectangles */
228	if ( srcrect ) {
229		if ( (srcrect->x < 0) || (srcrect->y < 0) ||
230		     ((srcrect->x+srcrect->w) > src->w) ||
231		     ((srcrect->y+srcrect->h) > src->h) ) {
232			SDL_SetError("Invalid source blit rectangle");
233			return(-1);
234		}
235	} else {
236		full_src.x = 0;
237		full_src.y = 0;
238		full_src.w = src->w;
239		full_src.h = src->h;
240		srcrect = &full_src;
241	}
242	if ( dstrect ) {
243		if ( (dstrect->x < 0) || (dstrect->y < 0) ||
244		     ((dstrect->x+dstrect->w) > dst->w) ||
245		     ((dstrect->y+dstrect->h) > dst->h) ) {
246			SDL_SetError("Invalid destination blit rectangle");
247			return(-1);
248		}
249	} else {
250		full_dst.x = 0;
251		full_dst.y = 0;
252		full_dst.w = dst->w;
253		full_dst.h = dst->h;
254		dstrect = &full_dst;
255	}
256
257	/* Lock the destination if it's in hardware */
258	dst_locked = 0;
259	if ( SDL_MUSTLOCK(dst) ) {
260		if ( SDL_LockSurface(dst) < 0 ) {
261			SDL_SetError("Unable to lock destination surface");
262			return(-1);
263		}
264		dst_locked = 1;
265	}
266	/* Lock the source if it's in hardware */
267	src_locked = 0;
268	if ( SDL_MUSTLOCK(src) ) {
269		if ( SDL_LockSurface(src) < 0 ) {
270			if ( dst_locked ) {
271				SDL_UnlockSurface(dst);
272			}
273			SDL_SetError("Unable to lock source surface");
274			return(-1);
275		}
276		src_locked = 1;
277	}
278
279	/* Set up the data... */
280	pos = 0x10000;
281	inc = (srcrect->h << 16) / dstrect->h;
282	src_row = srcrect->y;
283	dst_row = dstrect->y;
284
285#ifdef USE_ASM_STRETCH
286	/* Write the opcodes for this stretch */
287	if ( (bpp == 3) ||
288	     (generate_rowbytes(srcrect->w, dstrect->w, bpp) < 0) ) {
289		use_asm = SDL_FALSE;
290	}
291#endif
292
293	/* Perform the stretch blit */
294	for ( dst_maxrow = dst_row+dstrect->h; dst_row<dst_maxrow; ++dst_row ) {
295		dstp = (Uint8 *)dst->pixels + (dst_row*dst->pitch)
296		                            + (dstrect->x*bpp);
297		while ( pos >= 0x10000L ) {
298			srcp = (Uint8 *)src->pixels + (src_row*src->pitch)
299			                            + (srcrect->x*bpp);
300			++src_row;
301			pos -= 0x10000L;
302		}
303#ifdef USE_ASM_STRETCH
304		if (use_asm) {
305#ifdef __GNUC__
306			__asm__ __volatile__ (
307			"call *%4"
308			: "=&D" (u1), "=&S" (u2)
309			: "0" (dstp), "1" (srcp), "r" (copy_row)
310			: "memory" );
311#elif defined(_MSC_VER) || defined(__WATCOMC__)
312		{ void *code = copy_row;
313			__asm {
314				push edi
315				push esi
316
317				mov edi, dstp
318				mov esi, srcp
319				call dword ptr code
320
321				pop esi
322				pop edi
323			}
324		}
325#else
326#error Need inline assembly for this compiler
327#endif
328		} else
329#endif
330		switch (bpp) {
331		    case 1:
332			copy_row1(srcp, srcrect->w, dstp, dstrect->w);
333			break;
334		    case 2:
335			copy_row2((Uint16 *)srcp, srcrect->w,
336			          (Uint16 *)dstp, dstrect->w);
337			break;
338		    case 3:
339			copy_row3(srcp, srcrect->w, dstp, dstrect->w);
340			break;
341		    case 4:
342			copy_row4((Uint32 *)srcp, srcrect->w,
343			          (Uint32 *)dstp, dstrect->w);
344			break;
345		}
346		pos += inc;
347	}
348
349	/* We need to unlock the surfaces if they're locked */
350	if ( dst_locked ) {
351		SDL_UnlockSurface(dst);
352	}
353	if ( src_locked ) {
354		SDL_UnlockSurface(src);
355	}
356	return(0);
357}
358
359