radeon_ioctl.c revision 99ef0a03292e7dc6aa2465aaaa620f394d2c286b
1/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.11 2003/01/29 22:04:59 dawes Exp $ */
2/**************************************************************************
3
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7All Rights Reserved.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice (including the
18next paragraph) shall be included in all copies or substantial
19portions of the Software.
20
21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29**************************************************************************/
30
31/*
32 * Authors:
33 *   Kevin E. Martin <martin@valinux.com>
34 *   Gareth Hughes <gareth@valinux.com>
35 *   Keith Whitwell <keith@tungstengraphics.com>
36 */
37
38#include <sched.h>
39#include <errno.h>
40
41#include "glheader.h"
42#include "imports.h"
43#include "simple_list.h"
44#include "swrast/swrast.h"
45
46#include "radeon_context.h"
47#include "radeon_state.h"
48#include "radeon_ioctl.h"
49#include "radeon_tcl.h"
50#include "radeon_sanity.h"
51
52#include "radeon_macros.h"  /* for INREG() */
53
54#include "vblank.h"
55
56#define RADEON_TIMEOUT             512
57#define RADEON_IDLE_RETRY           16
58
59
60static void radeonWaitForIdle( radeonContextPtr rmesa );
61
62/* =============================================================
63 * Kernel command buffer handling
64 */
65
66static void print_state_atom( struct radeon_state_atom *state )
67{
68   int i;
69
70   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
71
72   if (RADEON_DEBUG & DEBUG_VERBOSE)
73      for (i = 0 ; i < state->cmd_size ; i++)
74	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
75
76}
77
78static void radeon_emit_state_list( radeonContextPtr rmesa,
79				    struct radeon_state_atom *list )
80{
81   struct radeon_state_atom *state, *tmp;
82   char *dest;
83
84   /* From Felix Kuhling: similar to some other lockups, glaxium will
85    * lock with what we believe to be a normal command stream, but
86    * sprinkling some magic waits arounds allows it to run
87    * uninterrupted.  This has a slight effect on q3 framerates, but
88    * it might now be possible to remove the zbs hack, below.
89    *
90    * Felix reports that this can be narrowed down to just
91    * tcl,tex0,tex1 state, but that's pretty much every statechange,
92    * so let's just put the wait in always (unless Felix wants to
93    * narrow it down further...)
94    */
95   if (1) {
96      drmRadeonCmdHeader *cmd;
97      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd),
98						     __FUNCTION__ );
99      cmd->wait.cmd_type = RADEON_CMD_WAIT;
100      cmd->wait.flags = RADEON_WAIT_3D;
101   }
102
103   foreach_s( state, tmp, list ) {
104      if (state->check( rmesa->glCtx )) {
105	 dest = radeonAllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__);
106	 memcpy( dest, state->cmd, state->cmd_size * 4);
107	 move_to_head( &(rmesa->hw.clean), state );
108	 if (RADEON_DEBUG & DEBUG_STATE)
109	    print_state_atom( state );
110      }
111      else if (RADEON_DEBUG & DEBUG_STATE)
112	 fprintf(stderr, "skip state %s\n", state->name);
113   }
114}
115
116
117void radeonEmitState( radeonContextPtr rmesa )
118{
119   struct radeon_state_atom *state, *tmp;
120
121   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
122      fprintf(stderr, "%s\n", __FUNCTION__);
123
124   /* Somewhat overkill:
125    */
126   if (rmesa->lost_context) {
127      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
128	 fprintf(stderr, "%s - lost context\n", __FUNCTION__);
129
130      foreach_s( state, tmp, &(rmesa->hw.clean) )
131	 move_to_tail(&(rmesa->hw.dirty), state );
132
133      rmesa->lost_context = 0;
134   }
135   else if (1) {
136      /* This is a darstardly kludge to work around a lockup that I
137       * haven't otherwise figured out.
138       */
139      move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
140   }
141
142   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
143     foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
144       if (state->is_tcl) {
145	 move_to_head( &(rmesa->hw.clean), state );
146       }
147     }
148   }
149
150   radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
151}
152
153
154
155/* Fire a section of the retained (indexed_verts) buffer as a regular
156 * primtive.
157 */
158extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
159				GLuint vertex_format,
160				GLuint primitive,
161				GLuint vertex_nr )
162{
163   drmRadeonCmdHeader *cmd;
164
165
166   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
167
168   radeonEmitState( rmesa );
169
170   if (RADEON_DEBUG & DEBUG_IOCTL)
171      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
172	      rmesa->store.cmd_used/4);
173
174#if RADEON_OLD_PACKETS
175   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 6 * sizeof(*cmd),
176						  __FUNCTION__ );
177   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
178   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
179   cmd[2].i = rmesa->ioctl.vertex_offset;
180   cmd[3].i = vertex_nr;
181   cmd[4].i = vertex_format;
182   cmd[5].i = (primitive |
183	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
184	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
185	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
186	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
187
188   if (RADEON_DEBUG & DEBUG_PRIMS)
189      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
190	      __FUNCTION__,
191	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
192#else
193   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 4 * sizeof(*cmd),
194						  __FUNCTION__ );
195   cmd[0].i = 0;
196   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
197   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
198   cmd[2].i = vertex_format;
199   cmd[3].i = (primitive |
200	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
201	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
202	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
203	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
204	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
205
206
207   if (RADEON_DEBUG & DEBUG_PRIMS)
208      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
209	      __FUNCTION__,
210	      cmd[1].i, cmd[2].i, cmd[3].i);
211#endif
212}
213
214
215void radeonFlushElts( radeonContextPtr rmesa )
216{
217   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
218   int dwords;
219#if RADEON_OLD_PACKETS
220   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
221#else
222   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
223#endif
224
225   if (RADEON_DEBUG & DEBUG_IOCTL)
226      fprintf(stderr, "%s\n", __FUNCTION__);
227
228   assert( rmesa->dma.flush == radeonFlushElts );
229   rmesa->dma.flush = 0;
230
231   /* Cope with odd number of elts:
232    */
233   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
234   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
235
236#if RADEON_OLD_PACKETS
237   cmd[1] |= (dwords - 3) << 16;
238   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
239#else
240   cmd[1] |= (dwords - 3) << 16;
241   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
242#endif
243}
244
245
246GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
247				    GLuint vertex_format,
248				    GLuint primitive,
249				    GLuint min_nr )
250{
251   drmRadeonCmdHeader *cmd;
252   GLushort *retval;
253
254   if (RADEON_DEBUG & DEBUG_IOCTL)
255      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
256
257   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
258
259   radeonEmitState( rmesa );
260
261#if RADEON_OLD_PACKETS
262   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa,
263						  24 + min_nr*2,
264						  __FUNCTION__ );
265   cmd[0].i = 0;
266   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
267   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
268   cmd[2].i = rmesa->ioctl.vertex_offset;
269   cmd[3].i = 0xffff;
270   cmd[4].i = vertex_format;
271   cmd[5].i = (primitive |
272	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
273	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
274	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
275
276   retval = (GLushort *)(cmd+6);
277#else
278   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa,
279						  16 + min_nr*2,
280						  __FUNCTION__ );
281   cmd[0].i = 0;
282   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
283   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
284   cmd[2].i = vertex_format;
285   cmd[3].i = (primitive |
286	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
287	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
288	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
289	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
290
291   retval = (GLushort *)(cmd+4);
292#endif
293
294   if (RADEON_DEBUG & DEBUG_PRIMS)
295      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
296	      __FUNCTION__,
297	      cmd[1].i, vertex_format, primitive);
298
299   assert(!rmesa->dma.flush);
300   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
301   rmesa->dma.flush = radeonFlushElts;
302
303   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
304
305   return retval;
306}
307
308
309
310void radeonEmitVertexAOS( radeonContextPtr rmesa,
311			  GLuint vertex_size,
312			  GLuint offset )
313{
314#if RADEON_OLD_PACKETS
315   rmesa->ioctl.vertex_size = vertex_size;
316   rmesa->ioctl.vertex_offset = offset;
317#else
318   drmRadeonCmdHeader *cmd;
319
320   if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
321      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
322	      __FUNCTION__, vertex_size, offset);
323
324   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 5 * sizeof(int),
325						  __FUNCTION__ );
326
327   cmd[0].i = 0;
328   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
329   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
330   cmd[2].i = 1;
331   cmd[3].i = vertex_size | (vertex_size << 8);
332   cmd[4].i = offset;
333#endif
334}
335
336
337void radeonEmitAOS( radeonContextPtr rmesa,
338		    struct radeon_dma_region **component,
339		    GLuint nr,
340		    GLuint offset )
341{
342#if RADEON_OLD_PACKETS
343   assert( nr == 1 );
344   assert( component[0]->aos_size == component[0]->aos_stride );
345   rmesa->ioctl.vertex_size = component[0]->aos_size;
346   rmesa->ioctl.vertex_offset =
347      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
348#else
349   drmRadeonCmdHeader *cmd;
350   int sz = 3 + (nr/2 * 3) + (nr & 1) * 2;
351   int i;
352   int *tmp;
353
354   if (RADEON_DEBUG & DEBUG_IOCTL)
355      fprintf(stderr, "%s\n", __FUNCTION__);
356
357
358   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sz * sizeof(int),
359						  __FUNCTION__ );
360   cmd[0].i = 0;
361   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
362   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | ((sz-3) << 16);
363   cmd[2].i = nr;
364   tmp = &cmd[0].i;
365   cmd += 3;
366
367   for (i = 0 ; i < nr ; i++) {
368      if (i & 1) {
369	 cmd[0].i |= ((component[i]->aos_stride << 24) |
370		      (component[i]->aos_size << 16));
371	 cmd[2].i = (component[i]->aos_start +
372		     offset * component[i]->aos_stride * 4);
373	 cmd += 3;
374      }
375      else {
376	 cmd[0].i = ((component[i]->aos_stride << 8) |
377		     (component[i]->aos_size << 0));
378	 cmd[1].i = (component[i]->aos_start +
379		     offset * component[i]->aos_stride * 4);
380      }
381   }
382
383   if (RADEON_DEBUG & DEBUG_VERTS) {
384      fprintf(stderr, "%s:\n", __FUNCTION__);
385      for (i = 0 ; i < sz ; i++)
386	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
387   }
388#endif
389}
390
391/* using already shifted color_fmt! */
392void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
393		   GLuint color_fmt,
394		   GLuint src_pitch,
395		   GLuint src_offset,
396		   GLuint dst_pitch,
397		   GLuint dst_offset,
398		   GLint srcx, GLint srcy,
399		   GLint dstx, GLint dsty,
400		   GLuint w, GLuint h )
401{
402   drmRadeonCmdHeader *cmd;
403
404   if (RADEON_DEBUG & DEBUG_IOCTL)
405      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
406	      __FUNCTION__,
407	      src_pitch, src_offset, srcx, srcy,
408	      dst_pitch, dst_offset, dstx, dsty,
409	      w, h);
410
411   assert( (src_pitch & 63) == 0 );
412   assert( (dst_pitch & 63) == 0 );
413   assert( (src_offset & 1023) == 0 );
414   assert( (dst_offset & 1023) == 0 );
415   assert( w < (1<<16) );
416   assert( h < (1<<16) );
417
418   cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
419						  __FUNCTION__ );
420
421
422   cmd[0].i = 0;
423   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
424   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
425   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
426	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
427	       RADEON_GMC_BRUSH_NONE |
428	       color_fmt |
429	       RADEON_GMC_SRC_DATATYPE_COLOR |
430	       RADEON_ROP3_S |
431	       RADEON_DP_SRC_SOURCE_MEMORY |
432	       RADEON_GMC_CLR_CMP_CNTL_DIS |
433	       RADEON_GMC_WR_MSK_DIS );
434
435   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
436   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
437   cmd[5].i = (srcx << 16) | srcy;
438   cmd[6].i = (dstx << 16) | dsty; /* dst */
439   cmd[7].i = (w << 16) | h;
440}
441
442
443void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
444{
445   if (rmesa->dri.drmMinor >= 6) {
446      drmRadeonCmdHeader *cmd;
447
448      assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
449
450      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
451						   __FUNCTION__ );
452      cmd[0].i = 0;
453      cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
454      cmd[0].wait.flags = flags;
455   }
456}
457
458
459static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
460				    const char * caller )
461{
462   int ret, i;
463   drmRadeonCmdBuffer cmd;
464
465   if (RADEON_DEBUG & DEBUG_IOCTL) {
466      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
467
468      if (RADEON_DEBUG & DEBUG_VERBOSE)
469	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
470	    fprintf(stderr, "%d: %x\n", i/4,
471		    *(int *)(&rmesa->store.cmd_buf[i]));
472   }
473
474   if (RADEON_DEBUG & DEBUG_DMA)
475      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
476	      rmesa->dma.nr_released_bufs);
477
478
479   if (RADEON_DEBUG & DEBUG_SANITY) {
480      if (rmesa->state.scissor.enabled)
481	 ret = radeonSanityCmdBuffer( rmesa,
482				      rmesa->state.scissor.numClipRects,
483				      rmesa->state.scissor.pClipRects);
484      else
485	 ret = radeonSanityCmdBuffer( rmesa,
486				      rmesa->numClipRects,
487				      rmesa->pClipRects);
488      if (ret) {
489	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
490	 goto out;
491      }
492   }
493
494
495   cmd.bufsz = rmesa->store.cmd_used;
496   cmd.buf = rmesa->store.cmd_buf;
497
498   if (rmesa->state.scissor.enabled) {
499      cmd.nbox = rmesa->state.scissor.numClipRects;
500      cmd.boxes = (drmClipRect *)rmesa->state.scissor.pClipRects;
501   } else {
502      cmd.nbox = rmesa->numClipRects;
503      cmd.boxes = (drmClipRect *)rmesa->pClipRects;
504   }
505
506   ret = drmCommandWrite( rmesa->dri.fd,
507			  DRM_RADEON_CMDBUF,
508			  &cmd, sizeof(cmd) );
509
510   if (ret)
511      fprintf(stderr, "drmCommandWrite: %d\n", ret);
512
513 out:
514   rmesa->store.primnr = 0;
515   rmesa->store.statenr = 0;
516   rmesa->store.cmd_used = 0;
517   rmesa->dma.nr_released_bufs = 0;
518   rmesa->lost_context = 1;
519   return ret;
520}
521
522
523/* Note: does not emit any commands to avoid recursion on
524 * radeonAllocCmdBuf.
525 */
526void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
527{
528   int ret;
529
530
531   LOCK_HARDWARE( rmesa );
532
533   ret = radeonFlushCmdBufLocked( rmesa, caller );
534
535   UNLOCK_HARDWARE( rmesa );
536
537   if (ret) {
538      fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret);
539      exit(ret);
540   }
541}
542
543/* =============================================================
544 * Hardware vertex buffer handling
545 */
546
547
548void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
549{
550   struct radeon_dma_buffer *dmabuf;
551   int fd = rmesa->dri.fd;
552   int index = 0;
553   int size = 0;
554   drmDMAReq dma;
555   int ret;
556
557   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
558      fprintf(stderr, "%s\n", __FUNCTION__);
559
560   if (rmesa->dma.flush) {
561      rmesa->dma.flush( rmesa );
562   }
563
564   if (rmesa->dma.current.buf)
565      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
566
567   if (rmesa->dma.nr_released_bufs > 4)
568      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
569
570   dma.context = rmesa->dri.hwContext;
571   dma.send_count = 0;
572   dma.send_list = NULL;
573   dma.send_sizes = NULL;
574   dma.flags = 0;
575   dma.request_count = 1;
576   dma.request_size = RADEON_BUFFER_SIZE;
577   dma.request_list = &index;
578   dma.request_sizes = &size;
579   dma.granted_count = 0;
580
581   LOCK_HARDWARE(rmesa);	/* no need to validate */
582
583   ret = drmDMA( fd, &dma );
584
585   if (ret != 0) {
586      /* Free some up this way?
587       */
588      if (rmesa->dma.nr_released_bufs) {
589	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
590      }
591
592      if (RADEON_DEBUG & DEBUG_DMA)
593	 fprintf(stderr, "Waiting for buffers\n");
594
595      radeonWaitForIdleLocked( rmesa );
596      ret = drmDMA( fd, &dma );
597
598      if ( ret != 0 ) {
599	 UNLOCK_HARDWARE( rmesa );
600	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
601	 exit( -1 );
602      }
603   }
604
605   UNLOCK_HARDWARE(rmesa);
606
607   if (RADEON_DEBUG & DEBUG_DMA)
608      fprintf(stderr, "Allocated buffer %d\n", index);
609
610   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
611   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
612   dmabuf->refcount = 1;
613
614   rmesa->dma.current.buf = dmabuf;
615   rmesa->dma.current.address = dmabuf->buf->address;
616   rmesa->dma.current.end = dmabuf->buf->total;
617   rmesa->dma.current.start = 0;
618   rmesa->dma.current.ptr = 0;
619
620   rmesa->c_vertexBuffers++;
621}
622
623void radeonReleaseDmaRegion( radeonContextPtr rmesa,
624			     struct radeon_dma_region *region,
625			     const char *caller )
626{
627   if (RADEON_DEBUG & DEBUG_IOCTL)
628      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
629
630   if (!region->buf)
631      return;
632
633   if (rmesa->dma.flush)
634      rmesa->dma.flush( rmesa );
635
636   if (--region->buf->refcount == 0) {
637      drmRadeonCmdHeader *cmd;
638
639      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
640	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
641		 region->buf->buf->idx);
642
643      cmd = (drmRadeonCmdHeader *)radeonAllocCmdBuf( rmesa, sizeof(*cmd),
644						     __FUNCTION__ );
645      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
646      cmd->dma.buf_idx = region->buf->buf->idx;
647      FREE(region->buf);
648      rmesa->dma.nr_released_bufs++;
649   }
650
651   region->buf = 0;
652   region->start = 0;
653}
654
655/* Allocates a region from rmesa->dma.current.  If there isn't enough
656 * space in current, grab a new buffer (and discard what was left of current)
657 */
658void radeonAllocDmaRegion( radeonContextPtr rmesa,
659			   struct radeon_dma_region *region,
660			   int bytes,
661			   int alignment )
662{
663   if (RADEON_DEBUG & DEBUG_IOCTL)
664      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
665
666   if (rmesa->dma.flush)
667      rmesa->dma.flush( rmesa );
668
669   if (region->buf)
670      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
671
672   alignment--;
673   rmesa->dma.current.start = rmesa->dma.current.ptr =
674      (rmesa->dma.current.ptr + alignment) & ~alignment;
675
676   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
677      radeonRefillCurrentDmaRegion( rmesa );
678
679   region->start = rmesa->dma.current.start;
680   region->ptr = rmesa->dma.current.start;
681   region->end = rmesa->dma.current.start + bytes;
682   region->address = rmesa->dma.current.address;
683   region->buf = rmesa->dma.current.buf;
684   region->buf->refcount++;
685
686   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
687   rmesa->dma.current.start =
688      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
689}
690
691void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
692				struct radeon_dma_region *region,
693				int numverts,
694				int vertsize,
695				int alignment )
696{
697   radeonAllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
698}
699
700/* ================================================================
701 * SwapBuffers with client-side throttling
702 */
703
704static CARD32 radeonGetLastFrame (radeonContextPtr rmesa)
705{
706   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
707   int ret;
708   CARD32 frame;
709
710   if (rmesa->dri.screen->drmMinor >= 4) {
711      drmRadeonGetParam gp;
712
713      gp.param = RADEON_PARAM_LAST_FRAME;
714      gp.value = (int *)&frame;
715      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
716				 &gp, sizeof(gp) );
717   }
718   else
719      ret = -EINVAL;
720
721#ifndef __alpha__
722   if ( ret == -EINVAL ) {
723      frame = INREG( RADEON_LAST_FRAME_REG );
724      ret = 0;
725   }
726#endif
727   if ( ret ) {
728      fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
729      exit(1);
730   }
731
732   return frame;
733}
734
735static void radeonEmitIrqLocked( radeonContextPtr rmesa )
736{
737   drmRadeonIrqEmit ie;
738   int ret;
739
740   ie.irq_seq = &rmesa->iw.irq_seq;
741   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
742			      &ie, sizeof(ie) );
743   if ( ret ) {
744      fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret );
745      exit(1);
746   }
747}
748
749
750static void radeonWaitIrq( radeonContextPtr rmesa )
751{
752   int ret;
753
754   do {
755      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
756			     &rmesa->iw, sizeof(rmesa->iw) );
757   } while (ret && (errno == EINTR || errno == EAGAIN));
758
759   if ( ret ) {
760      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
761      exit(1);
762   }
763}
764
765
766static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
767{
768   RADEONSAREAPrivPtr sarea = rmesa->sarea;
769
770   if (rmesa->do_irqs) {
771      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
772	 if (!rmesa->irqsEmitted) {
773	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
774	       ;
775	 }
776	 else {
777	    UNLOCK_HARDWARE( rmesa );
778	    radeonWaitIrq( rmesa );
779	    LOCK_HARDWARE( rmesa );
780	 }
781	 rmesa->irqsEmitted = 10;
782      }
783
784      if (rmesa->irqsEmitted) {
785	 radeonEmitIrqLocked( rmesa );
786	 rmesa->irqsEmitted--;
787      }
788   }
789   else {
790      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
791	 UNLOCK_HARDWARE( rmesa );
792	 if (rmesa->do_usleeps)
793	    DO_USLEEP( 1 );
794	 LOCK_HARDWARE( rmesa );
795      }
796   }
797}
798
799/* Copy the back color buffer to the front color buffer.
800 */
801void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
802{
803   radeonContextPtr rmesa;
804   GLint nbox, i, ret;
805   GLboolean   missed_target;
806   int64_t     ust;
807
808   assert(dPriv);
809   assert(dPriv->driContextPriv);
810   assert(dPriv->driContextPriv->driverPrivate);
811
812   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
813
814   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
815      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, rmesa->glCtx );
816   }
817
818   RADEON_FIREVERTICES( rmesa );
819   LOCK_HARDWARE( rmesa );
820
821   /* Throttle the frame rate -- only allow one pending swap buffers
822    * request at a time.
823    */
824   radeonWaitForFrameCompletion( rmesa );
825   UNLOCK_HARDWARE( rmesa );
826   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
827   LOCK_HARDWARE( rmesa );
828
829   nbox = dPriv->numClipRects; /* must be in locked region */
830
831   for ( i = 0 ; i < nbox ; ) {
832      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
833      XF86DRIClipRectPtr box = dPriv->pClipRects;
834      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
835      GLint n = 0;
836
837      for ( ; i < nr ; i++ ) {
838	 *b++ = box[i];
839	 n++;
840      }
841      rmesa->sarea->nbox = n;
842
843      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
844
845      if ( ret ) {
846	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
847	 UNLOCK_HARDWARE( rmesa );
848	 exit( 1 );
849      }
850   }
851
852   UNLOCK_HARDWARE( rmesa );
853   rmesa->swap_count++;
854   (*rmesa->get_ust)( & ust );
855   if ( missed_target ) {
856      rmesa->swap_missed_count++;
857      rmesa->swap_missed_ust = ust - rmesa->swap_ust;
858   }
859
860   rmesa->swap_ust = ust;
861}
862
863void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
864{
865   radeonContextPtr rmesa;
866   GLint ret;
867   GLboolean   missed_target;
868
869   assert(dPriv);
870   assert(dPriv->driContextPriv);
871   assert(dPriv->driContextPriv->driverPrivate);
872
873   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
874
875   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
876      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
877	      rmesa->sarea->pfCurrentPage);
878   }
879
880   RADEON_FIREVERTICES( rmesa );
881   LOCK_HARDWARE( rmesa );
882
883   /* Need to do this for the perf box placement:
884    */
885   if (dPriv->numClipRects)
886   {
887      XF86DRIClipRectPtr box = dPriv->pClipRects;
888      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
889      b[0] = box[0];
890      rmesa->sarea->nbox = 1;
891   }
892
893   /* Throttle the frame rate -- only allow a few pending swap buffers
894    * request at a time.
895    */
896   radeonWaitForFrameCompletion( rmesa );
897   UNLOCK_HARDWARE( rmesa );
898   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
899   if ( missed_target ) {
900      rmesa->swap_missed_count++;
901      (void) (*rmesa->get_ust)( & rmesa->swap_missed_ust );
902   }
903   LOCK_HARDWARE( rmesa );
904
905   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
906
907   UNLOCK_HARDWARE( rmesa );
908
909   if ( ret ) {
910      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
911      exit( 1 );
912   }
913
914   rmesa->swap_count++;
915   (void) (*rmesa->get_ust)( & rmesa->swap_ust );
916
917   if ( rmesa->sarea->pfCurrentPage == 1 ) {
918	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
919	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
920   } else {
921	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
922	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
923   }
924
925   RADEON_STATECHANGE( rmesa, ctx );
926   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
927					   + rmesa->radeonScreen->fbLocation;
928   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
929}
930
931
932/* ================================================================
933 * Buffer clear
934 */
935#define RADEON_MAX_CLEARS	256
936
937static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
938			 GLint cx, GLint cy, GLint cw, GLint ch )
939{
940   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
941   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
942   RADEONSAREAPrivPtr sarea = rmesa->sarea;
943   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
944   CARD32 clear;
945   GLuint flags = 0;
946   GLuint color_mask = 0;
947   GLint ret, i;
948
949   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
950      fprintf( stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
951	       __FUNCTION__, all, cx, cy, cw, ch );
952   }
953
954   radeonEmitState( rmesa );
955
956   /* Need to cope with lostcontext here as kernel relies on
957    * some residual state:
958    */
959   RADEON_FIREVERTICES( rmesa );
960
961   if ( mask & DD_FRONT_LEFT_BIT ) {
962      flags |= RADEON_FRONT;
963      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
964      mask &= ~DD_FRONT_LEFT_BIT;
965   }
966
967   if ( mask & DD_BACK_LEFT_BIT ) {
968      flags |= RADEON_BACK;
969      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
970      mask &= ~DD_BACK_LEFT_BIT;
971   }
972
973   if ( mask & DD_DEPTH_BIT ) {
974      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
975      mask &= ~DD_DEPTH_BIT;
976   }
977
978   if ( (mask & DD_STENCIL_BIT) && rmesa->state.stencil.hwBuffer ) {
979      flags |= RADEON_STENCIL;
980      mask &= ~DD_STENCIL_BIT;
981   }
982
983   if ( mask ) {
984      if (RADEON_DEBUG & DEBUG_FALLBACKS)
985	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
986      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
987   }
988
989   if ( !flags )
990      return;
991
992
993   /* Flip top to bottom */
994   cx += dPriv->x;
995   cy  = dPriv->y + dPriv->h - cy - ch;
996
997   LOCK_HARDWARE( rmesa );
998
999   /* Throttle the number of clear ioctls we do.
1000    */
1001   while ( 1 ) {
1002      int ret;
1003
1004      if (rmesa->dri.screen->drmMinor >= 4) {
1005	drmRadeonGetParam gp;
1006
1007	gp.param = RADEON_PARAM_LAST_CLEAR;
1008	gp.value = (int *)&clear;
1009	ret = drmCommandWriteRead( rmesa->dri.fd,
1010				   DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
1011      } else
1012	ret = -EINVAL;
1013
1014#ifndef __alpha__
1015      if ( ret == -EINVAL ) {
1016	 clear = INREG( RADEON_LAST_CLEAR_REG );
1017	 ret = 0;
1018      }
1019#endif
1020      if ( ret ) {
1021	 fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
1022	 exit(1);
1023      }
1024      if ( RADEON_DEBUG & DEBUG_IOCTL ) {
1025	 fprintf( stderr, "%s( %d )\n", __FUNCTION__, (int)clear );
1026	 if ( ret ) fprintf( stderr, " ( RADEON_LAST_CLEAR register read directly )\n" );
1027      }
1028
1029      if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
1030	 break;
1031      }
1032
1033      if ( rmesa->do_usleeps ) {
1034	 UNLOCK_HARDWARE( rmesa );
1035	 DO_USLEEP( 1 );
1036	 LOCK_HARDWARE( rmesa );
1037      }
1038   }
1039
1040   for ( i = 0 ; i < dPriv->numClipRects ; ) {
1041      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
1042      XF86DRIClipRectPtr box = dPriv->pClipRects;
1043      XF86DRIClipRectPtr b = rmesa->sarea->boxes;
1044      drmRadeonClearType clear;
1045      drmRadeonClearRect depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1046      GLint n = 0;
1047
1048      if ( !all ) {
1049	 for ( ; i < nr ; i++ ) {
1050	    GLint x = box[i].x1;
1051	    GLint y = box[i].y1;
1052	    GLint w = box[i].x2 - x;
1053	    GLint h = box[i].y2 - y;
1054
1055	    if ( x < cx ) w -= cx - x, x = cx;
1056	    if ( y < cy ) h -= cy - y, y = cy;
1057	    if ( x + w > cx + cw ) w = cx + cw - x;
1058	    if ( y + h > cy + ch ) h = cy + ch - y;
1059	    if ( w <= 0 ) continue;
1060	    if ( h <= 0 ) continue;
1061
1062	    b->x1 = x;
1063	    b->y1 = y;
1064	    b->x2 = x + w;
1065	    b->y2 = y + h;
1066	    b++;
1067	    n++;
1068	 }
1069      } else {
1070	 for ( ; i < nr ; i++ ) {
1071	    *b++ = box[i];
1072	    n++;
1073	 }
1074      }
1075
1076      rmesa->sarea->nbox = n;
1077
1078      clear.flags       = flags;
1079      clear.clear_color = rmesa->state.color.clear;
1080      clear.clear_depth = rmesa->state.depth.clear;
1081      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
1082      clear.depth_mask  = rmesa->state.stencil.clear;
1083      clear.depth_boxes = depth_boxes;
1084
1085      n--;
1086      b = rmesa->sarea->boxes;
1087      for ( ; n >= 0 ; n-- ) {
1088	 depth_boxes[n].f[RADEON_CLEAR_X1] = (float)b[n].x1;
1089	 depth_boxes[n].f[RADEON_CLEAR_Y1] = (float)b[n].y1;
1090	 depth_boxes[n].f[RADEON_CLEAR_X2] = (float)b[n].x2;
1091	 depth_boxes[n].f[RADEON_CLEAR_Y2] = (float)b[n].y2;
1092	 depth_boxes[n].f[RADEON_CLEAR_DEPTH] =
1093	    (float)rmesa->state.depth.clear;
1094      }
1095
1096      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
1097			     &clear, sizeof(drmRadeonClearType));
1098
1099      if ( ret ) {
1100	 UNLOCK_HARDWARE( rmesa );
1101	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
1102	 exit( 1 );
1103      }
1104   }
1105
1106   UNLOCK_HARDWARE( rmesa );
1107}
1108
1109
1110void radeonWaitForIdleLocked( radeonContextPtr rmesa )
1111{
1112    int fd = rmesa->dri.fd;
1113    int to = 0;
1114    int ret, i = 0;
1115
1116    rmesa->c_drawWaits++;
1117
1118    do {
1119        do {
1120            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
1121        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
1122    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
1123
1124    if ( ret < 0 ) {
1125	UNLOCK_HARDWARE( rmesa );
1126	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
1127	exit( -1 );
1128    }
1129}
1130
1131
1132static void radeonWaitForIdle( radeonContextPtr rmesa )
1133{
1134   LOCK_HARDWARE(rmesa);
1135   radeonWaitForIdleLocked( rmesa );
1136   UNLOCK_HARDWARE(rmesa);
1137}
1138
1139
1140void radeonFlush( GLcontext *ctx )
1141{
1142   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
1143
1144   if (RADEON_DEBUG & DEBUG_IOCTL)
1145      fprintf(stderr, "%s\n", __FUNCTION__);
1146
1147   if (rmesa->dma.flush)
1148      rmesa->dma.flush( rmesa );
1149
1150   if (!is_empty_list(&rmesa->hw.dirty))
1151      radeonEmitState( rmesa );
1152
1153   if (rmesa->store.cmd_used)
1154      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
1155}
1156
1157/* Make sure all commands have been sent to the hardware and have
1158 * completed processing.
1159 */
1160void radeonFinish( GLcontext *ctx )
1161{
1162   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1163   radeonFlush( ctx );
1164
1165   if (rmesa->do_irqs) {
1166      LOCK_HARDWARE( rmesa );
1167      radeonEmitIrqLocked( rmesa );
1168      UNLOCK_HARDWARE( rmesa );
1169      radeonWaitIrq( rmesa );
1170   }
1171   else
1172      radeonWaitForIdle( rmesa );
1173}
1174
1175
1176void radeonInitIoctlFuncs( GLcontext *ctx )
1177{
1178    ctx->Driver.Clear = radeonClear;
1179    ctx->Driver.Finish = radeonFinish;
1180    ctx->Driver.Flush = radeonFlush;
1181}
1182
1183