1/*
2    SDL - Simple DirectMedia Layer
3    Copyright (C) 1997-2012 Sam Lantinga
4
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14
15    You should have received a copy of the GNU Lesser General Public
16    License along with this library; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
19    Sam Lantinga
20    slouken@libsdl.org
21*/
22#include "SDL_config.h"
23
24/* This is the Playstation 2 implementation of YUV video overlays */
25
26#include <fcntl.h>
27#include <unistd.h>
28#include <sys/ioctl.h>
29#include <sys/mman.h>
30#include <asm/page.h>		/* For definition of PAGE_SIZE */
31
32#include "SDL_video.h"
33#include "SDL_gsyuv_c.h"
34#include "../SDL_yuvfuncs.h"
35
36/* The maximum number of 16x16 pixel block converted at once */
37#define MAX_MACROBLOCKS	1024	/* 2^10 macroblocks at once */
38
39/* The functions used to manipulate video overlays */
40static struct private_yuvhwfuncs gs_yuvfuncs = {
41	GS_LockYUVOverlay,
42	GS_UnlockYUVOverlay,
43	GS_DisplayYUVOverlay,
44	GS_FreeYUVOverlay
45};
46
47struct private_yuvhwdata {
48	int ipu_fd;
49	Uint8 *pixels;
50	int macroblocks;
51	int dma_len;
52	caddr_t dma_mem;
53	caddr_t ipu_imem;
54	caddr_t ipu_omem;
55	caddr_t dma_tags;
56	unsigned long long *stretch_x1y1;
57	unsigned long long *stretch_x2y2;
58	struct ps2_plist plist;
59
60	/* These are just so we don't have to allocate them separately */
61	Uint16 pitches[3];
62	Uint8 *planes[3];
63};
64
65static int power_of_2(int value)
66{
67	int shift;
68
69	for ( shift = 0; (1<<shift) < value; ++shift ) {
70		/* Keep looking */ ;
71	}
72	return(shift);
73}
74
75SDL_Overlay *GS_CreateYUVOverlay(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
76{
77	SDL_Overlay *overlay;
78	struct private_yuvhwdata *hwdata;
79	int map_offset;
80	unsigned long long *tags;
81	caddr_t base;
82	int bpp;
83	int fbp, fbw, psm;
84	int x, y, w, h;
85	int pnum;
86	struct ps2_packet *packet;
87	struct ps2_packet tex_packet;
88
89	/* We can only decode blocks of 16x16 pixels */
90	if ( (width & 15) || (height & 15) ) {
91		SDL_SetError("Overlay width/height must be multiples of 16");
92		return(NULL);
93	}
94	/* Make sure the image isn't too large for a single DMA transfer */
95	if ( ((width/16) * (height/16)) > MAX_MACROBLOCKS ) {
96		SDL_SetError("Overlay too large (maximum size: %d pixels)",
97		             MAX_MACROBLOCKS * 16 * 16);
98		return(NULL);
99	}
100
101	/* Double-check the requested format.  For simplicity, we'll only
102	   support planar YUV formats.
103	 */
104	switch (format) {
105	    case SDL_YV12_OVERLAY:
106	    case SDL_IYUV_OVERLAY:
107		/* Supported planar YUV format */
108		break;
109	    default:
110		SDL_SetError("Unsupported YUV format");
111		return(NULL);
112	}
113
114	/* Create the overlay structure */
115	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
116	if ( overlay == NULL ) {
117		SDL_OutOfMemory();
118		return(NULL);
119	}
120	SDL_memset(overlay, 0, (sizeof *overlay));
121
122	/* Fill in the basic members */
123	overlay->format = format;
124	overlay->w = width;
125	overlay->h = height;
126
127	/* Set up the YUV surface function structure */
128	overlay->hwfuncs = &gs_yuvfuncs;
129	overlay->hw_overlay = 1;
130
131	/* Create the pixel data */
132	hwdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *hwdata);
133	overlay->hwdata = hwdata;
134	if ( hwdata == NULL ) {
135		SDL_FreeYUVOverlay(overlay);
136		SDL_OutOfMemory();
137		return(NULL);
138	}
139	hwdata->ipu_fd = -1;
140	hwdata->pixels = (Uint8 *)SDL_malloc(width*height*2);
141	if ( hwdata->pixels == NULL ) {
142		SDL_FreeYUVOverlay(overlay);
143		SDL_OutOfMemory();
144		return(NULL);
145	}
146	hwdata->macroblocks = (width/16) * (height/16);
147
148	/* Find the pitch and offset values for the overlay */
149	overlay->pitches = hwdata->pitches;
150	overlay->pixels = hwdata->planes;
151	switch (format) {
152	    case SDL_YV12_OVERLAY:
153	    case SDL_IYUV_OVERLAY:
154		overlay->pitches[0] = overlay->w;
155		overlay->pitches[1] = overlay->pitches[0] / 2;
156		overlay->pitches[2] = overlay->pitches[0] / 2;
157	        overlay->pixels[0] = hwdata->pixels;
158	        overlay->pixels[1] = overlay->pixels[0] +
159		                     overlay->pitches[0] * overlay->h;
160	        overlay->pixels[2] = overlay->pixels[1] +
161		                     overlay->pitches[1] * overlay->h / 2;
162		overlay->planes = 3;
163		break;
164	    default:
165		/* We should never get here (caught above) */
166		break;
167	}
168
169	/* Theoretically we could support several concurrent decode
170	   streams queueing up on the same file descriptor, but for
171	   simplicity we'll support only one.  Opening the IPU more
172	   than once will fail with EBUSY.
173	*/
174	hwdata->ipu_fd = open("/dev/ps2ipu", O_RDWR);
175	if ( hwdata->ipu_fd < 0 ) {
176		SDL_FreeYUVOverlay(overlay);
177		SDL_SetError("Playstation 2 IPU busy");
178		return(NULL);
179	}
180
181	/* Allocate a DMA area for pixel conversion */
182	bpp = this->screen->format->BytesPerPixel;
183	map_offset = (mapped_len + (sysconf(_SC_PAGESIZE) - 1)) & ~(sysconf(_SC_PAGESIZE) - 1);
184	hwdata->dma_len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8) +
185	                  width * height * bpp +
186	                  hwdata->macroblocks * (16 * sizeof(long long)) +
187	                  12 * sizeof(long long);
188	hwdata->dma_mem = mmap(0, hwdata->dma_len, PROT_READ|PROT_WRITE,
189	                       MAP_SHARED, memory_fd, map_offset);
190	if ( hwdata->dma_mem == MAP_FAILED ) {
191		hwdata->ipu_imem = (caddr_t)0;
192		SDL_FreeYUVOverlay(overlay);
193		SDL_SetError("Unable to map %d bytes for DMA", hwdata->dma_len);
194		return(NULL);
195	}
196	hwdata->ipu_imem = hwdata->dma_mem;
197	hwdata->ipu_omem = hwdata->ipu_imem +
198	                   hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
199	hwdata->dma_tags = hwdata->ipu_omem + width * height * bpp;
200
201	/* Allocate memory for the DMA packets */
202	hwdata->plist.num = hwdata->macroblocks * 4 + 1;
203	hwdata->plist.packet = (struct ps2_packet *)SDL_malloc(
204	                       hwdata->plist.num*sizeof(struct ps2_packet));
205	if ( ! hwdata->plist.packet ) {
206		SDL_FreeYUVOverlay(overlay);
207		SDL_OutOfMemory();
208		return(NULL);
209	}
210	pnum = 0;
211	packet = hwdata->plist.packet;
212
213	/* Set up the tags to send the image to the screen */
214	tags = (unsigned long long *)hwdata->dma_tags;
215	base = hwdata->ipu_omem;
216	fbp = screen_image.fbp;
217	fbw = screen_image.fbw;
218	psm = screen_image.psm;
219	y = screen_image.y + screen_image.h;	/* Offscreen video memory */
220	for ( h=height/16; h; --h ) {
221		x = 0;			/* Visible video memory */
222		for ( w=width/16; w; --w ) {
223			/* The head tag */
224			packet[pnum].ptr = &tags[0];
225			packet[pnum].len = 10 * sizeof(*tags);
226			++pnum;
227			tags[0] = 4 | (1LL << 60);	/* GIFtag */
228			tags[1] = 0x0e;			/* A+D */
229			tags[2] = ((unsigned long long)fbp << 32) |
230			          ((unsigned long long)fbw << 48) |
231			          ((unsigned long long)psm << 56);
232			tags[3] = PS2_GS_BITBLTBUF;
233			tags[4] = ((unsigned long long)x << 32) |
234			          ((unsigned long long)y << 48);
235			tags[5] = PS2_GS_TRXPOS;
236			tags[6] = (unsigned long long)16 |
237			          ((unsigned long long)16 << 32);
238			tags[7] = PS2_GS_TRXREG;
239			tags[8] = 0;
240			tags[9] = PS2_GS_TRXDIR;
241			/* Now the actual image data */
242			packet[pnum].ptr = &tags[10];
243			packet[pnum].len = 2 * sizeof(*tags);
244			++pnum;
245			tags[10] = ((16*16*bpp) >> 4) | (2LL << 58);
246			tags[11] = 0;
247			packet[pnum].ptr = (void *)base;
248			packet[pnum].len = 16 * 16 * bpp;
249			++pnum;
250			packet[pnum].ptr = &tags[12];
251			packet[pnum].len = 2 * sizeof(*tags);
252			++pnum;
253			tags[12] = (0 >> 4) | (1 << 15) | (2LL << 58);
254			tags[13] = 0;
255
256			tags += 16;
257			base += 16 * 16 * bpp;
258
259			x += 16;
260		}
261		y += 16;
262	}
263
264	/* Set up the texture memory area for the video */
265	tex_packet.ptr = tags;
266	tex_packet.len = 8 * sizeof(*tags);
267	tags[0] = 3 | (1LL << 60);	/* GIFtag */
268	tags[1] = 0x0e;			/* A+D */
269	tags[2] = ((screen_image.y + screen_image.h) * screen_image.w) / 64 +
270	          ((unsigned long long)fbw << 14) +
271	          ((unsigned long long)psm << 20) +
272	          ((unsigned long long)power_of_2(width) << 26) +
273	          ((unsigned long long)power_of_2(height) << 30) +
274	          ((unsigned long long)1 << 34) +
275	          ((unsigned long long)1 << 35);
276	tags[3] = PS2_GS_TEX0_1;
277	tags[4] = (1 << 5) + (1 << 6);
278	tags[5] = PS2_GS_TEX1_1;
279	tags[6] = 0;
280	tags[7] = PS2_GS_TEXFLUSH;
281	ioctl(console_fd, PS2IOC_SEND, &tex_packet);
282
283	/* Set up the tags for scaling the image */
284	packet[pnum].ptr = tags;
285	packet[pnum].len = 12 * sizeof(*tags);
286	++pnum;
287	tags[0] = 5 | (1LL << 60);	/* GIFtag */
288	tags[1] = 0x0e;			/* A+D */
289	tags[2] = 6 + (1 << 4) + (1 << 8);
290	tags[3] = PS2_GS_PRIM;
291	tags[4] = ((unsigned long long)0 * 16) +
292	           (((unsigned long long)0 * 16) << 16);
293	tags[5] = PS2_GS_UV;
294	tags[6] = 0; /* X1, Y1 */
295	tags[7] = PS2_GS_XYZ2;
296	hwdata->stretch_x1y1 = &tags[6];
297	tags[8] = ((unsigned long long)overlay->w * 16) +
298	           (((unsigned long long)overlay->h * 16) << 16);
299	tags[9] = PS2_GS_UV;
300	tags[10] = 0; /* X2, Y2 */
301	tags[11] = PS2_GS_XYZ2;
302	hwdata->stretch_x2y2 = &tags[10];
303
304	/* We're all done.. */
305	return(overlay);
306}
307
308int GS_LockYUVOverlay(_THIS, SDL_Overlay *overlay)
309{
310	return(0);
311}
312
313void GS_UnlockYUVOverlay(_THIS, SDL_Overlay *overlay)
314{
315	return;
316}
317
318int GS_DisplayYUVOverlay(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
319{
320	struct private_yuvhwdata *hwdata;
321	__u32 cmd;
322	struct ps2_packet packet;
323	int h, w, i;
324	Uint32 *lum, *Cr, *Cb;
325	int lum_pitch;
326	int crb_pitch;
327	Uint32 *lum_src, *Cr_src, *Cb_src;
328	Uint32 *srcp, *dstp;
329	unsigned int x, y;
330	SDL_Surface *screen;
331
332	/* Find out where the various portions of the image are */
333	hwdata = overlay->hwdata;
334	switch (overlay->format) {
335	    case SDL_YV12_OVERLAY:
336		lum = (Uint32 *)overlay->pixels[0];
337		Cr =  (Uint32 *)overlay->pixels[1];
338		Cb =  (Uint32 *)overlay->pixels[2];
339		break;
340	    case SDL_IYUV_OVERLAY:
341		lum = (Uint32 *)overlay->pixels[0];
342		Cr =  (Uint32 *)overlay->pixels[2];
343		Cb =  (Uint32 *)overlay->pixels[1];
344	    default:
345		SDL_SetError("Unsupported YUV format in blit (?)");
346		return(-1);
347	}
348	dstp = (Uint32 *)hwdata->ipu_imem;
349	lum_pitch = overlay->w/4;
350	crb_pitch = (overlay->w/2)/4;
351
352	/* Copy blocks of 16x16 pixels to the DMA area */
353	for ( h=overlay->h/16; h; --h ) {
354		lum_src = lum;
355		Cr_src = Cr;
356		Cb_src = Cb;
357		for ( w=overlay->w/16; w; --w ) {
358			srcp = lum_src;
359			for ( i=0; i<16; ++i ) {
360				dstp[0] = srcp[0];
361				dstp[1] = srcp[1];
362				dstp[2] = srcp[2];
363				dstp[3] = srcp[3];
364				srcp += lum_pitch;
365				dstp += 4;
366			}
367			srcp = Cb_src;
368			for ( i=0; i<8; ++i ) {
369				dstp[0] = srcp[0];
370				dstp[1] = srcp[1];
371				srcp += crb_pitch;
372				dstp += 2;
373			}
374			srcp = Cr_src;
375			for ( i=0; i<8; ++i ) {
376				dstp[0] = srcp[0];
377				dstp[1] = srcp[1];
378				srcp += crb_pitch;
379				dstp += 2;
380			}
381			lum_src += 16 / 4;
382			Cb_src += 8 / 4;
383			Cr_src += 8 / 4;
384		}
385		lum += lum_pitch * 16;
386		Cr += crb_pitch * 8;
387		Cb += crb_pitch * 8;
388	}
389
390	/* Send the macroblock data to the IPU */
391#ifdef DEBUG_YUV
392	fprintf(stderr, "Sending data to IPU..\n");
393#endif
394	packet.ptr = hwdata->ipu_imem;
395	packet.len = hwdata->macroblocks * (16 * 16 + 8 * 8 + 8 * 8);
396	ioctl(hwdata->ipu_fd, PS2IOC_SENDA, &packet);
397
398	/* Trigger the DMA to the IPU for conversion */
399#ifdef DEBUG_YUV
400	fprintf(stderr, "Trigging conversion command\n");
401#endif
402	cmd = (7 << 28) + hwdata->macroblocks;
403	if ( screen_image.psm == PS2_GS_PSMCT16 ) {
404		cmd += (1 << 27) +	/* Output RGB 555 */
405		       (1 << 26);	/* Dither output */
406	}
407	ioctl(hwdata->ipu_fd, PS2IOC_SIPUCMD, &cmd);
408
409	/* Retrieve the converted image from the IPU */
410#ifdef DEBUG_YUV
411	fprintf(stderr, "Retrieving data from IPU..\n");
412#endif
413	packet.ptr = hwdata->ipu_omem;
414	packet.len = overlay->w * overlay->h *
415	             this->screen->format->BytesPerPixel;
416	ioctl(hwdata->ipu_fd, PS2IOC_RECV, &packet);
417
418#ifdef DEBUG_YUV
419	fprintf(stderr, "Copying image to screen..\n");
420#endif
421	/* Wait for previous DMA to complete */
422	ioctl(console_fd, PS2IOC_SENDQCT, 1);
423
424	/* Send the current image to the screen and scale it */
425	screen = this->screen;
426	x = (unsigned int)dst->x;
427	y = (unsigned int)dst->y;
428	if ( screen->offset ) {
429		x += (screen->offset % screen->pitch) /
430		     screen->format->BytesPerPixel;
431		y += (screen->offset / screen->pitch);
432	}
433	y += screen_image.y;
434	*hwdata->stretch_x1y1 = (x * 16) + ((y * 16) << 16);
435	x += (unsigned int)dst->w;
436	y += (unsigned int)dst->h;
437	*hwdata->stretch_x2y2 = (x * 16) + ((y * 16) << 16);
438	return ioctl(console_fd, PS2IOC_SENDL, &hwdata->plist);
439}
440
441void GS_FreeYUVOverlay(_THIS, SDL_Overlay *overlay)
442{
443	struct private_yuvhwdata *hwdata;
444
445	hwdata = overlay->hwdata;
446	if ( hwdata ) {
447		if ( hwdata->ipu_fd >= 0 ) {
448			close(hwdata->ipu_fd);
449		}
450		if ( hwdata->dma_mem ) {
451			munmap(hwdata->dma_mem, hwdata->dma_len);
452		}
453		if ( hwdata->plist.packet ) {
454			SDL_free(hwdata->plist.packet);
455		}
456		if ( hwdata->pixels ) {
457			SDL_free(hwdata->pixels);
458		}
459		SDL_free(hwdata);
460	}
461}
462