radeon_ioctl.c revision 6f3cc6a5226fd4b5d44cca91e2f76216ecaff831
1/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c,v 1.11 2003/01/29 22:04:59 dawes Exp $ */
2/**************************************************************************
3
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7All Rights Reserved.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice (including the
18next paragraph) shall be included in all copies or substantial
19portions of the Software.
20
21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29**************************************************************************/
30
31/*
32 * Authors:
33 *   Kevin E. Martin <martin@valinux.com>
34 *   Gareth Hughes <gareth@valinux.com>
35 *   Keith Whitwell <keith@tungstengraphics.com>
36 */
37
38#include <sched.h>
39#include <errno.h>
40
41#include "glheader.h"
42#include "imports.h"
43#include "simple_list.h"
44#include "swrast/swrast.h"
45
46#include "radeon_context.h"
47#include "radeon_state.h"
48#include "radeon_ioctl.h"
49#include "radeon_tcl.h"
50#include "radeon_sanity.h"
51
52#define STANDALONE_MMIO
53#include "radeon_macros.h"  /* for INREG() */
54
55#include "vblank.h"
56
57#define RADEON_TIMEOUT             512
58#define RADEON_IDLE_RETRY           16
59
60
61static void radeonWaitForIdle( radeonContextPtr rmesa );
62
63/* =============================================================
64 * Kernel command buffer handling
65 */
66
67static void print_state_atom( struct radeon_state_atom *state )
68{
69   int i;
70
71   fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size);
72
73   if (RADEON_DEBUG & DEBUG_VERBOSE)
74      for (i = 0 ; i < state->cmd_size ; i++)
75	 fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
76
77}
78
79static void radeon_emit_state_list( radeonContextPtr rmesa,
80				    struct radeon_state_atom *list )
81{
82   struct radeon_state_atom *state, *tmp;
83   char *dest;
84   int i, size, texunits;
85
86   /* It appears that some permutations of state atoms lock up the
87    * chip.  Therefore we make sure that state atoms are emitted in a
88    * fixed order. First mark all dirty state atoms and then go
89    * through all state atoms in a well defined order and emit only
90    * the marked ones.
91    * FIXME: This requires knowledge of which state atoms exist.
92    * FIXME: Is the zbs hack below still needed?
93    */
94   size = 0;
95   foreach_s( state, tmp, list ) {
96      if (state->check( rmesa->glCtx )) {
97	 size += state->cmd_size;
98	 state->dirty = GL_TRUE;
99	 move_to_head( &(rmesa->hw.clean), state );
100	 if (RADEON_DEBUG & DEBUG_STATE)
101	    print_state_atom( state );
102      }
103      else if (RADEON_DEBUG & DEBUG_STATE)
104	 fprintf(stderr, "skip state %s\n", state->name);
105   }
106   /* short cut */
107   if (!size)
108       return;
109
110   dest = radeonAllocCmdBuf( rmesa, size * 4, __FUNCTION__);
111   texunits = rmesa->glCtx->Const.MaxTextureUnits;
112
113#define EMIT_ATOM(ATOM) \
114do { \
115   if (rmesa->hw.ATOM.dirty) { \
116      rmesa->hw.ATOM.dirty = GL_FALSE; \
117      memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \
118      dest += rmesa->hw.ATOM.cmd_size * 4; \
119   } \
120} while (0)
121
122   EMIT_ATOM (ctx);
123   EMIT_ATOM (set);
124   EMIT_ATOM (lin);
125   EMIT_ATOM (msk);
126   EMIT_ATOM (vpt);
127   EMIT_ATOM (tcl);
128   EMIT_ATOM (msc);
129   for (i = 0; i < texunits; ++i) {
130       EMIT_ATOM (tex[i]);
131       EMIT_ATOM (txr[i]);
132   }
133   EMIT_ATOM (zbs);
134   EMIT_ATOM (mtl);
135   for (i = 0; i < 3 + texunits; ++i)
136       EMIT_ATOM (mat[i]);
137   for (i = 0; i < 8; ++i)
138       EMIT_ATOM (lit[i]);
139   for (i = 0; i < 6; ++i)
140       EMIT_ATOM (ucp[i]);
141   EMIT_ATOM (eye);
142   EMIT_ATOM (grd);
143   EMIT_ATOM (fog);
144   EMIT_ATOM (glt);
145
146#undef EMIT_ATOM
147}
148
149
150void radeonEmitState( radeonContextPtr rmesa )
151{
152   struct radeon_state_atom *state, *tmp;
153
154   if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
155      fprintf(stderr, "%s\n", __FUNCTION__);
156
157   /* Somewhat overkill:
158    */
159   if (rmesa->lost_context) {
160      if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
161	 fprintf(stderr, "%s - lost context\n", __FUNCTION__);
162
163      foreach_s( state, tmp, &(rmesa->hw.clean) )
164	 move_to_tail(&(rmesa->hw.dirty), state );
165
166      rmesa->lost_context = 0;
167   }
168   else if (1) {
169      /* This is a darstardly kludge to work around a lockup that I
170       * haven't otherwise figured out.
171       */
172      move_to_tail(&(rmesa->hw.dirty), &(rmesa->hw.zbs) );
173   }
174
175   if (!(rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL)) {
176     foreach_s( state, tmp, &(rmesa->hw.dirty) ) {
177       if (state->is_tcl) {
178	 move_to_head( &(rmesa->hw.clean), state );
179       }
180     }
181   }
182
183   radeon_emit_state_list( rmesa, &rmesa->hw.dirty );
184}
185
186
187
188/* Fire a section of the retained (indexed_verts) buffer as a regular
189 * primtive.
190 */
191extern void radeonEmitVbufPrim( radeonContextPtr rmesa,
192				GLuint vertex_format,
193				GLuint primitive,
194				GLuint vertex_nr )
195{
196   drm_radeon_cmd_header_t *cmd;
197
198
199   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
200
201   radeonEmitState( rmesa );
202
203   if (RADEON_DEBUG & DEBUG_IOCTL)
204      fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
205	      rmesa->store.cmd_used/4);
206
207   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
208						       __FUNCTION__ );
209#if RADEON_OLD_PACKETS
210   cmd[0].i = 0;
211   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
212   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
213   cmd[2].i = rmesa->ioctl.vertex_offset;
214   cmd[3].i = vertex_nr;
215   cmd[4].i = vertex_format;
216   cmd[5].i = (primitive |
217	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
218	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
219	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
220	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
221
222   if (RADEON_DEBUG & DEBUG_PRIMS)
223      fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
224	      __FUNCTION__,
225	      cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
226#else
227   cmd[0].i = 0;
228   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
229   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
230   cmd[2].i = vertex_format;
231   cmd[3].i = (primitive |
232	       RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
233	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
234	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
235	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
236	       (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
237
238
239   if (RADEON_DEBUG & DEBUG_PRIMS)
240      fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
241	      __FUNCTION__,
242	      cmd[1].i, cmd[2].i, cmd[3].i);
243#endif
244}
245
246
247void radeonFlushElts( radeonContextPtr rmesa )
248{
249   int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start);
250   int dwords;
251#if RADEON_OLD_PACKETS
252   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2;
253#else
254   int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2;
255#endif
256
257   if (RADEON_DEBUG & DEBUG_IOCTL)
258      fprintf(stderr, "%s\n", __FUNCTION__);
259
260   assert( rmesa->dma.flush == radeonFlushElts );
261   rmesa->dma.flush = 0;
262
263   /* Cope with odd number of elts:
264    */
265   rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2;
266   dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4;
267
268#if RADEON_OLD_PACKETS
269   cmd[1] |= (dwords - 3) << 16;
270   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
271#else
272   cmd[1] |= (dwords - 3) << 16;
273   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
274#endif
275}
276
277
278GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa,
279				    GLuint vertex_format,
280				    GLuint primitive,
281				    GLuint min_nr )
282{
283   drm_radeon_cmd_header_t *cmd;
284   GLushort *retval;
285
286   if (RADEON_DEBUG & DEBUG_IOCTL)
287      fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr);
288
289   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
290
291   radeonEmitState( rmesa );
292
293   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
294						       ELTS_BUFSZ(min_nr),
295						       __FUNCTION__ );
296#if RADEON_OLD_PACKETS
297   cmd[0].i = 0;
298   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
299   cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
300   cmd[2].i = rmesa->ioctl.vertex_offset;
301   cmd[3].i = 0xffff;
302   cmd[4].i = vertex_format;
303   cmd[5].i = (primitive |
304	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
305	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
306	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
307
308   retval = (GLushort *)(cmd+6);
309#else
310   cmd[0].i = 0;
311   cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
312   cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
313   cmd[2].i = vertex_format;
314   cmd[3].i = (primitive |
315	       RADEON_CP_VC_CNTL_PRIM_WALK_IND |
316	       RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
317	       RADEON_CP_VC_CNTL_MAOS_ENABLE |
318	       RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
319
320   retval = (GLushort *)(cmd+4);
321#endif
322
323   if (RADEON_DEBUG & DEBUG_PRIMS)
324      fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
325	      __FUNCTION__,
326	      cmd[1].i, vertex_format, primitive);
327
328   assert(!rmesa->dma.flush);
329   rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
330   rmesa->dma.flush = radeonFlushElts;
331
332   rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf;
333
334   return retval;
335}
336
337
338
339void radeonEmitVertexAOS( radeonContextPtr rmesa,
340			  GLuint vertex_size,
341			  GLuint offset )
342{
343#if RADEON_OLD_PACKETS
344   rmesa->ioctl.vertex_size = vertex_size;
345   rmesa->ioctl.vertex_offset = offset;
346#else
347   drm_radeon_cmd_header_t *cmd;
348
349   if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
350      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
351	      __FUNCTION__, vertex_size, offset);
352
353   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
354						  __FUNCTION__ );
355
356   cmd[0].i = 0;
357   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
358   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
359   cmd[2].i = 1;
360   cmd[3].i = vertex_size | (vertex_size << 8);
361   cmd[4].i = offset;
362#endif
363}
364
365
366void radeonEmitAOS( radeonContextPtr rmesa,
367		    struct radeon_dma_region **component,
368		    GLuint nr,
369		    GLuint offset )
370{
371#if RADEON_OLD_PACKETS
372   assert( nr == 1 );
373   assert( component[0]->aos_size == component[0]->aos_stride );
374   rmesa->ioctl.vertex_size = component[0]->aos_size;
375   rmesa->ioctl.vertex_offset =
376      (component[0]->aos_start + offset * component[0]->aos_stride * 4);
377#else
378   drm_radeon_cmd_header_t *cmd;
379   int sz = AOS_BUFSZ;
380   int i;
381   int *tmp;
382
383   if (RADEON_DEBUG & DEBUG_IOCTL)
384      fprintf(stderr, "%s\n", __FUNCTION__);
385
386
387   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz,
388						  __FUNCTION__ );
389   cmd[0].i = 0;
390   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
391   cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16);
392   cmd[2].i = nr;
393   tmp = &cmd[0].i;
394   cmd += 3;
395
396   for (i = 0 ; i < nr ; i++) {
397      if (i & 1) {
398	 cmd[0].i |= ((component[i]->aos_stride << 24) |
399		      (component[i]->aos_size << 16));
400	 cmd[2].i = (component[i]->aos_start +
401		     offset * component[i]->aos_stride * 4);
402	 cmd += 3;
403      }
404      else {
405	 cmd[0].i = ((component[i]->aos_stride << 8) |
406		     (component[i]->aos_size << 0));
407	 cmd[1].i = (component[i]->aos_start +
408		     offset * component[i]->aos_stride * 4);
409      }
410   }
411
412   if (RADEON_DEBUG & DEBUG_VERTS) {
413      fprintf(stderr, "%s:\n", __FUNCTION__);
414      for (i = 0 ; i < sz ; i++)
415	 fprintf(stderr, "   %d: %x\n", i, tmp[i]);
416   }
417#endif
418}
419
420/* using already shifted color_fmt! */
421void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */
422		   GLuint color_fmt,
423		   GLuint src_pitch,
424		   GLuint src_offset,
425		   GLuint dst_pitch,
426		   GLuint dst_offset,
427		   GLint srcx, GLint srcy,
428		   GLint dstx, GLint dsty,
429		   GLuint w, GLuint h )
430{
431   drm_radeon_cmd_header_t *cmd;
432
433   if (RADEON_DEBUG & DEBUG_IOCTL)
434      fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n",
435	      __FUNCTION__,
436	      src_pitch, src_offset, srcx, srcy,
437	      dst_pitch, dst_offset, dstx, dsty,
438	      w, h);
439
440   assert( (src_pitch & 63) == 0 );
441   assert( (dst_pitch & 63) == 0 );
442   assert( (src_offset & 1023) == 0 );
443   assert( (dst_offset & 1023) == 0 );
444   assert( w < (1<<16) );
445   assert( h < (1<<16) );
446
447   cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int),
448						  __FUNCTION__ );
449
450
451   cmd[0].i = 0;
452   cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
453   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
454   cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
455	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
456	       RADEON_GMC_BRUSH_NONE |
457	       color_fmt |
458	       RADEON_GMC_SRC_DATATYPE_COLOR |
459	       RADEON_ROP3_S |
460	       RADEON_DP_SRC_SOURCE_MEMORY |
461	       RADEON_GMC_CLR_CMP_CNTL_DIS |
462	       RADEON_GMC_WR_MSK_DIS );
463
464   cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10);
465   cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10);
466   cmd[5].i = (srcx << 16) | srcy;
467   cmd[6].i = (dstx << 16) | dsty; /* dst */
468   cmd[7].i = (w << 16) | h;
469}
470
471
472void radeonEmitWait( radeonContextPtr rmesa, GLuint flags )
473{
474   if (rmesa->dri.drmMinor >= 6) {
475      drm_radeon_cmd_header_t *cmd;
476
477      assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) );
478
479      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int),
480						   __FUNCTION__ );
481      cmd[0].i = 0;
482      cmd[0].wait.cmd_type = RADEON_CMD_WAIT;
483      cmd[0].wait.flags = flags;
484   }
485}
486
487
488static int radeonFlushCmdBufLocked( radeonContextPtr rmesa,
489				    const char * caller )
490{
491   int ret, i;
492   drm_radeon_cmd_buffer_t cmd;
493
494   if (RADEON_DEBUG & DEBUG_IOCTL) {
495      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
496
497      if (RADEON_DEBUG & DEBUG_VERBOSE)
498	 for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 )
499	    fprintf(stderr, "%d: %x\n", i/4,
500		    *(int *)(&rmesa->store.cmd_buf[i]));
501   }
502
503   if (RADEON_DEBUG & DEBUG_DMA)
504      fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__,
505	      rmesa->dma.nr_released_bufs);
506
507
508   if (RADEON_DEBUG & DEBUG_SANITY) {
509      if (rmesa->state.scissor.enabled)
510	 ret = radeonSanityCmdBuffer( rmesa,
511				      rmesa->state.scissor.numClipRects,
512				      rmesa->state.scissor.pClipRects);
513      else
514	 ret = radeonSanityCmdBuffer( rmesa,
515				      rmesa->numClipRects,
516				      rmesa->pClipRects);
517      if (ret) {
518	 fprintf(stderr, "drmSanityCommandWrite: %d\n", ret);
519	 goto out;
520      }
521   }
522
523
524   cmd.bufsz = rmesa->store.cmd_used;
525   cmd.buf = rmesa->store.cmd_buf;
526
527   if (rmesa->state.scissor.enabled) {
528      cmd.nbox = rmesa->state.scissor.numClipRects;
529      cmd.boxes = rmesa->state.scissor.pClipRects;
530   } else {
531      cmd.nbox = rmesa->numClipRects;
532      cmd.boxes = rmesa->pClipRects;
533   }
534
535   ret = drmCommandWrite( rmesa->dri.fd,
536			  DRM_RADEON_CMDBUF,
537			  &cmd, sizeof(cmd) );
538
539   if (ret)
540      fprintf(stderr, "drmCommandWrite: %d\n", ret);
541
542 out:
543   rmesa->store.primnr = 0;
544   rmesa->store.statenr = 0;
545   rmesa->store.cmd_used = 0;
546   rmesa->dma.nr_released_bufs = 0;
547   return ret;
548}
549
550
551/* Note: does not emit any commands to avoid recursion on
552 * radeonAllocCmdBuf.
553 */
554void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller )
555{
556   int ret;
557
558
559   LOCK_HARDWARE( rmesa );
560
561   ret = radeonFlushCmdBufLocked( rmesa, caller );
562
563   UNLOCK_HARDWARE( rmesa );
564
565   if (ret) {
566      fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret);
567      exit(ret);
568   }
569}
570
571/* =============================================================
572 * Hardware vertex buffer handling
573 */
574
575
576void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa )
577{
578   struct radeon_dma_buffer *dmabuf;
579   int fd = rmesa->dri.fd;
580   int index = 0;
581   int size = 0;
582   drmDMAReq dma;
583   int ret;
584
585   if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
586      fprintf(stderr, "%s\n", __FUNCTION__);
587
588   if (rmesa->dma.flush) {
589      rmesa->dma.flush( rmesa );
590   }
591
592   if (rmesa->dma.current.buf)
593      radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ );
594
595   if (rmesa->dma.nr_released_bufs > 4)
596      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
597
598   dma.context = rmesa->dri.hwContext;
599   dma.send_count = 0;
600   dma.send_list = NULL;
601   dma.send_sizes = NULL;
602   dma.flags = 0;
603   dma.request_count = 1;
604   dma.request_size = RADEON_BUFFER_SIZE;
605   dma.request_list = &index;
606   dma.request_sizes = &size;
607   dma.granted_count = 0;
608
609   LOCK_HARDWARE(rmesa);	/* no need to validate */
610
611   ret = drmDMA( fd, &dma );
612
613   if (ret != 0) {
614      /* Free some up this way?
615       */
616      if (rmesa->dma.nr_released_bufs) {
617	 radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
618      }
619
620      if (RADEON_DEBUG & DEBUG_DMA)
621	 fprintf(stderr, "Waiting for buffers\n");
622
623      radeonWaitForIdleLocked( rmesa );
624      ret = drmDMA( fd, &dma );
625
626      if ( ret != 0 ) {
627	 UNLOCK_HARDWARE( rmesa );
628	 fprintf( stderr, "Error: Could not get dma buffer... exiting\n" );
629	 exit( -1 );
630      }
631   }
632
633   UNLOCK_HARDWARE(rmesa);
634
635   if (RADEON_DEBUG & DEBUG_DMA)
636      fprintf(stderr, "Allocated buffer %d\n", index);
637
638   dmabuf = CALLOC_STRUCT( radeon_dma_buffer );
639   dmabuf->buf = &rmesa->radeonScreen->buffers->list[index];
640   dmabuf->refcount = 1;
641
642   rmesa->dma.current.buf = dmabuf;
643   rmesa->dma.current.address = dmabuf->buf->address;
644   rmesa->dma.current.end = dmabuf->buf->total;
645   rmesa->dma.current.start = 0;
646   rmesa->dma.current.ptr = 0;
647
648   rmesa->c_vertexBuffers++;
649}
650
651void radeonReleaseDmaRegion( radeonContextPtr rmesa,
652			     struct radeon_dma_region *region,
653			     const char *caller )
654{
655   if (RADEON_DEBUG & DEBUG_IOCTL)
656      fprintf(stderr, "%s from %s\n", __FUNCTION__, caller);
657
658   if (!region->buf)
659      return;
660
661   if (rmesa->dma.flush)
662      rmesa->dma.flush( rmesa );
663
664   if (--region->buf->refcount == 0) {
665      drm_radeon_cmd_header_t *cmd;
666
667      if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
668	 fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
669		 region->buf->buf->idx);
670
671      cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd),
672						     __FUNCTION__ );
673      cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD;
674      cmd->dma.buf_idx = region->buf->buf->idx;
675      FREE(region->buf);
676      rmesa->dma.nr_released_bufs++;
677   }
678
679   region->buf = 0;
680   region->start = 0;
681}
682
683/* Allocates a region from rmesa->dma.current.  If there isn't enough
684 * space in current, grab a new buffer (and discard what was left of current)
685 */
686void radeonAllocDmaRegion( radeonContextPtr rmesa,
687			   struct radeon_dma_region *region,
688			   int bytes,
689			   int alignment )
690{
691   if (RADEON_DEBUG & DEBUG_IOCTL)
692      fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
693
694   if (rmesa->dma.flush)
695      rmesa->dma.flush( rmesa );
696
697   if (region->buf)
698      radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ );
699
700   alignment--;
701   rmesa->dma.current.start = rmesa->dma.current.ptr =
702      (rmesa->dma.current.ptr + alignment) & ~alignment;
703
704   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
705      radeonRefillCurrentDmaRegion( rmesa );
706
707   region->start = rmesa->dma.current.start;
708   region->ptr = rmesa->dma.current.start;
709   region->end = rmesa->dma.current.start + bytes;
710   region->address = rmesa->dma.current.address;
711   region->buf = rmesa->dma.current.buf;
712   region->buf->refcount++;
713
714   rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
715   rmesa->dma.current.start =
716      rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
717}
718
719void radeonAllocDmaRegionVerts( radeonContextPtr rmesa,
720				struct radeon_dma_region *region,
721				int numverts,
722				int vertsize,
723				int alignment )
724{
725   radeonAllocDmaRegion( rmesa, region, vertsize * numverts, alignment );
726}
727
728/* ================================================================
729 * SwapBuffers with client-side throttling
730 */
731
732static uint32_t radeonGetLastFrame (radeonContextPtr rmesa)
733{
734   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
735   int ret;
736   uint32_t frame;
737
738   if (rmesa->dri.screen->drmMinor >= 4) {
739      drm_radeon_getparam_t gp;
740
741      gp.param = RADEON_PARAM_LAST_FRAME;
742      gp.value = (int *)&frame;
743      ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM,
744				 &gp, sizeof(gp) );
745   }
746   else
747      ret = -EINVAL;
748
749   if ( ret == -EINVAL ) {
750      frame = INREG( RADEON_LAST_FRAME_REG );
751      ret = 0;
752   }
753   if ( ret ) {
754      fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
755      exit(1);
756   }
757
758   return frame;
759}
760
761static void radeonEmitIrqLocked( radeonContextPtr rmesa )
762{
763   drm_radeon_irq_emit_t ie;
764   int ret;
765
766   ie.irq_seq = &rmesa->iw.irq_seq;
767   ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT,
768			      &ie, sizeof(ie) );
769   if ( ret ) {
770      fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret );
771      exit(1);
772   }
773}
774
775
776static void radeonWaitIrq( radeonContextPtr rmesa )
777{
778   int ret;
779
780   do {
781      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT,
782			     &rmesa->iw, sizeof(rmesa->iw) );
783   } while (ret && (errno == EINTR || errno == EAGAIN));
784
785   if ( ret ) {
786      fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret );
787      exit(1);
788   }
789}
790
791
792static void radeonWaitForFrameCompletion( radeonContextPtr rmesa )
793{
794   drm_radeon_sarea_t *sarea = rmesa->sarea;
795
796   if (rmesa->do_irqs) {
797      if (radeonGetLastFrame(rmesa) < sarea->last_frame) {
798	 if (!rmesa->irqsEmitted) {
799	    while (radeonGetLastFrame (rmesa) < sarea->last_frame)
800	       ;
801	 }
802	 else {
803	    UNLOCK_HARDWARE( rmesa );
804	    radeonWaitIrq( rmesa );
805	    LOCK_HARDWARE( rmesa );
806	 }
807	 rmesa->irqsEmitted = 10;
808      }
809
810      if (rmesa->irqsEmitted) {
811	 radeonEmitIrqLocked( rmesa );
812	 rmesa->irqsEmitted--;
813      }
814   }
815   else {
816      while (radeonGetLastFrame (rmesa) < sarea->last_frame) {
817	 UNLOCK_HARDWARE( rmesa );
818	 if (rmesa->do_usleeps)
819	    DO_USLEEP( 1 );
820	 LOCK_HARDWARE( rmesa );
821      }
822   }
823}
824
825/* Copy the back color buffer to the front color buffer.
826 */
827void radeonCopyBuffer( const __DRIdrawablePrivate *dPriv )
828{
829   radeonContextPtr rmesa;
830   GLint nbox, i, ret;
831   GLboolean   missed_target;
832   int64_t ust;
833
834   assert(dPriv);
835   assert(dPriv->driContextPriv);
836   assert(dPriv->driContextPriv->driverPrivate);
837
838   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
839
840   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
841      fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
842   }
843
844   RADEON_FIREVERTICES( rmesa );
845   LOCK_HARDWARE( rmesa );
846
847   /* Throttle the frame rate -- only allow one pending swap buffers
848    * request at a time.
849    */
850   radeonWaitForFrameCompletion( rmesa );
851   UNLOCK_HARDWARE( rmesa );
852   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
853   LOCK_HARDWARE( rmesa );
854
855   nbox = dPriv->numClipRects; /* must be in locked region */
856
857   for ( i = 0 ; i < nbox ; ) {
858      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
859      drm_clip_rect_t *box = dPriv->pClipRects;
860      drm_clip_rect_t *b = rmesa->sarea->boxes;
861      GLint n = 0;
862
863      for ( ; i < nr ; i++ ) {
864	 *b++ = box[i];
865	 n++;
866      }
867      rmesa->sarea->nbox = n;
868
869      ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
870
871      if ( ret ) {
872	 fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
873	 UNLOCK_HARDWARE( rmesa );
874	 exit( 1 );
875      }
876   }
877
878   UNLOCK_HARDWARE( rmesa );
879   rmesa->swap_count++;
880   (*rmesa->get_ust)( & ust );
881   if ( missed_target ) {
882      rmesa->swap_missed_count++;
883      rmesa->swap_missed_ust = ust - rmesa->swap_ust;
884   }
885
886   rmesa->swap_ust = ust;
887}
888
889void radeonPageFlip( const __DRIdrawablePrivate *dPriv )
890{
891   radeonContextPtr rmesa;
892   GLint ret;
893   GLboolean   missed_target;
894
895   assert(dPriv);
896   assert(dPriv->driContextPriv);
897   assert(dPriv->driContextPriv->driverPrivate);
898
899   rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
900
901   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
902      fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__,
903	      rmesa->sarea->pfCurrentPage);
904   }
905
906   RADEON_FIREVERTICES( rmesa );
907   LOCK_HARDWARE( rmesa );
908
909   /* Need to do this for the perf box placement:
910    */
911   if (dPriv->numClipRects)
912   {
913      drm_clip_rect_t *box = dPriv->pClipRects;
914      drm_clip_rect_t *b = rmesa->sarea->boxes;
915      b[0] = box[0];
916      rmesa->sarea->nbox = 1;
917   }
918
919   /* Throttle the frame rate -- only allow a few pending swap buffers
920    * request at a time.
921    */
922   radeonWaitForFrameCompletion( rmesa );
923   UNLOCK_HARDWARE( rmesa );
924   driWaitForVBlank( dPriv, & rmesa->vbl_seq, rmesa->vblank_flags, & missed_target );
925   if ( missed_target ) {
926      rmesa->swap_missed_count++;
927      (void) (*rmesa->get_ust)( & rmesa->swap_missed_ust );
928   }
929   LOCK_HARDWARE( rmesa );
930
931   ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP );
932
933   UNLOCK_HARDWARE( rmesa );
934
935   if ( ret ) {
936      fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
937      exit( 1 );
938   }
939
940   rmesa->swap_count++;
941   (void) (*rmesa->get_ust)( & rmesa->swap_ust );
942
943   if ( rmesa->sarea->pfCurrentPage == 1 ) {
944	 rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
945	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
946   } else {
947	 rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
948	 rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
949   }
950
951   RADEON_STATECHANGE( rmesa, ctx );
952   rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset
953					   + rmesa->radeonScreen->fbLocation;
954   rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH]  = rmesa->state.color.drawPitch;
955}
956
957
958/* ================================================================
959 * Buffer clear
960 */
961#define RADEON_MAX_CLEARS	256
962
963static void radeonClear( GLcontext *ctx, GLbitfield mask, GLboolean all,
964			 GLint cx, GLint cy, GLint cw, GLint ch )
965{
966   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
967   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
968   drm_radeon_sarea_t *sarea = rmesa->sarea;
969   unsigned char *RADEONMMIO = rmesa->radeonScreen->mmio.map;
970   uint32_t clear;
971   GLuint flags = 0;
972   GLuint color_mask = 0;
973   GLint ret, i;
974
975   if ( RADEON_DEBUG & DEBUG_IOCTL ) {
976      fprintf( stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
977	       __FUNCTION__, all, cx, cy, cw, ch );
978   }
979
980   /* Need to cope with lostcontext here as kernel relies on
981    * some residual state:
982    */
983   RADEON_FIREVERTICES( rmesa );
984
985   if ( mask & DD_FRONT_LEFT_BIT ) {
986      flags |= RADEON_FRONT;
987      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
988      mask &= ~DD_FRONT_LEFT_BIT;
989   }
990
991   if ( mask & DD_BACK_LEFT_BIT ) {
992      flags |= RADEON_BACK;
993      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
994      mask &= ~DD_BACK_LEFT_BIT;
995   }
996
997   if ( mask & DD_DEPTH_BIT ) {
998      if ( ctx->Depth.Mask ) flags |= RADEON_DEPTH; /* FIXME: ??? */
999      mask &= ~DD_DEPTH_BIT;
1000   }
1001
1002   if ( (mask & DD_STENCIL_BIT) && rmesa->state.stencil.hwBuffer ) {
1003      flags |= RADEON_STENCIL;
1004      mask &= ~DD_STENCIL_BIT;
1005   }
1006
1007   if ( mask ) {
1008      if (RADEON_DEBUG & DEBUG_FALLBACKS)
1009	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
1010      _swrast_Clear( ctx, mask, all, cx, cy, cw, ch );
1011   }
1012
1013   if ( !flags )
1014      return;
1015
1016
1017   /* Flip top to bottom */
1018   cx += dPriv->x;
1019   cy  = dPriv->y + dPriv->h - cy - ch;
1020
1021   LOCK_HARDWARE( rmesa );
1022
1023   /* Throttle the number of clear ioctls we do.
1024    */
1025   while ( 1 ) {
1026      int ret;
1027
1028      if (rmesa->dri.screen->drmMinor >= 4) {
1029	drm_radeon_getparam_t gp;
1030
1031	gp.param = RADEON_PARAM_LAST_CLEAR;
1032	gp.value = (int *)&clear;
1033	ret = drmCommandWriteRead( rmesa->dri.fd,
1034				   DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
1035      } else
1036	ret = -EINVAL;
1037
1038      if ( ret == -EINVAL ) {
1039	 clear = INREG( RADEON_LAST_CLEAR_REG );
1040	 ret = 0;
1041      }
1042      if ( ret ) {
1043	 fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
1044	 exit(1);
1045      }
1046      if ( RADEON_DEBUG & DEBUG_IOCTL ) {
1047	 fprintf( stderr, "%s( %d )\n", __FUNCTION__, (int)clear );
1048	 if ( ret ) fprintf( stderr, " ( RADEON_LAST_CLEAR register read directly )\n" );
1049      }
1050
1051      if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
1052	 break;
1053      }
1054
1055      if ( rmesa->do_usleeps ) {
1056	 UNLOCK_HARDWARE( rmesa );
1057	 DO_USLEEP( 1 );
1058	 LOCK_HARDWARE( rmesa );
1059      }
1060   }
1061
1062   for ( i = 0 ; i < dPriv->numClipRects ; ) {
1063      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
1064      drm_clip_rect_t *box = dPriv->pClipRects;
1065      drm_clip_rect_t *b = rmesa->sarea->boxes;
1066      drm_radeon_clear_t clear;
1067      drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1068      GLint n = 0;
1069
1070      if ( !all ) {
1071	 for ( ; i < nr ; i++ ) {
1072	    GLint x = box[i].x1;
1073	    GLint y = box[i].y1;
1074	    GLint w = box[i].x2 - x;
1075	    GLint h = box[i].y2 - y;
1076
1077	    if ( x < cx ) w -= cx - x, x = cx;
1078	    if ( y < cy ) h -= cy - y, y = cy;
1079	    if ( x + w > cx + cw ) w = cx + cw - x;
1080	    if ( y + h > cy + ch ) h = cy + ch - y;
1081	    if ( w <= 0 ) continue;
1082	    if ( h <= 0 ) continue;
1083
1084	    b->x1 = x;
1085	    b->y1 = y;
1086	    b->x2 = x + w;
1087	    b->y2 = y + h;
1088	    b++;
1089	    n++;
1090	 }
1091      } else {
1092	 for ( ; i < nr ; i++ ) {
1093	    *b++ = box[i];
1094	    n++;
1095	 }
1096      }
1097
1098      rmesa->sarea->nbox = n;
1099
1100      clear.flags       = flags;
1101      clear.clear_color = rmesa->state.color.clear;
1102      clear.clear_depth = rmesa->state.depth.clear;
1103      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
1104      clear.depth_mask  = rmesa->state.stencil.clear;
1105      clear.depth_boxes = depth_boxes;
1106
1107      n--;
1108      b = rmesa->sarea->boxes;
1109      for ( ; n >= 0 ; n-- ) {
1110	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
1111	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
1112	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
1113	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
1114	 depth_boxes[n].f[CLEAR_DEPTH] =
1115	    (float)rmesa->state.depth.clear;
1116      }
1117
1118      ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR,
1119			     &clear, sizeof(drm_radeon_clear_t));
1120
1121      if ( ret ) {
1122	 UNLOCK_HARDWARE( rmesa );
1123	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
1124	 exit( 1 );
1125      }
1126   }
1127
1128   UNLOCK_HARDWARE( rmesa );
1129}
1130
1131
1132void radeonWaitForIdleLocked( radeonContextPtr rmesa )
1133{
1134    int fd = rmesa->dri.fd;
1135    int to = 0;
1136    int ret, i = 0;
1137
1138    rmesa->c_drawWaits++;
1139
1140    do {
1141        do {
1142            ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE);
1143        } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY );
1144    } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) );
1145
1146    if ( ret < 0 ) {
1147	UNLOCK_HARDWARE( rmesa );
1148	fprintf( stderr, "Error: Radeon timed out... exiting\n" );
1149	exit( -1 );
1150    }
1151}
1152
1153
1154static void radeonWaitForIdle( radeonContextPtr rmesa )
1155{
1156   LOCK_HARDWARE(rmesa);
1157   radeonWaitForIdleLocked( rmesa );
1158   UNLOCK_HARDWARE(rmesa);
1159}
1160
1161
1162void radeonFlush( GLcontext *ctx )
1163{
1164   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
1165
1166   if (RADEON_DEBUG & DEBUG_IOCTL)
1167      fprintf(stderr, "%s\n", __FUNCTION__);
1168
1169   if (rmesa->dma.flush)
1170      rmesa->dma.flush( rmesa );
1171
1172   if (!is_empty_list(&rmesa->hw.dirty))
1173      radeonEmitState( rmesa );
1174
1175   if (rmesa->store.cmd_used)
1176      radeonFlushCmdBuf( rmesa, __FUNCTION__ );
1177}
1178
1179/* Make sure all commands have been sent to the hardware and have
1180 * completed processing.
1181 */
1182void radeonFinish( GLcontext *ctx )
1183{
1184   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1185   radeonFlush( ctx );
1186
1187   if (rmesa->do_irqs) {
1188      LOCK_HARDWARE( rmesa );
1189      radeonEmitIrqLocked( rmesa );
1190      UNLOCK_HARDWARE( rmesa );
1191      radeonWaitIrq( rmesa );
1192   }
1193   else
1194      radeonWaitForIdle( rmesa );
1195}
1196
1197
1198void radeonInitIoctlFuncs( GLcontext *ctx )
1199{
1200    ctx->Driver.Clear = radeonClear;
1201    ctx->Driver.Finish = radeonFinish;
1202    ctx->Driver.Flush = radeonFlush;
1203}
1204
1205