radeon_swtcl.c revision b79c47e90ce421426e4608c85461148f3962f9a0
1/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c,v 1.6 2003/05/06 23:52:08 daenzer Exp $ */
2/**************************************************************************
3
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7All Rights Reserved.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice (including the
18next paragraph) shall be included in all copies or substantial
19portions of the Software.
20
21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29**************************************************************************/
30
31/*
32 * Authors:
33 *   Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36#include "glheader.h"
37#include "mtypes.h"
38#include "colormac.h"
39#include "enums.h"
40#include "imports.h"
41#include "macros.h"
42
43#include "swrast_setup/swrast_setup.h"
44#include "math/m_translate.h"
45#include "tnl/tnl.h"
46#include "tnl/t_context.h"
47#include "tnl/t_pipeline.h"
48#include "tnl/t_vtx_api.h"	/* for _tnl_FlushVertices */
49
50#include "radeon_context.h"
51#include "radeon_ioctl.h"
52#include "radeon_state.h"
53#include "radeon_swtcl.h"
54#include "radeon_tcl.h"
55
56
57static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
58
59/* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
60/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
61#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
62
63/***********************************************************************
64 *                         Initialization
65 ***********************************************************************/
66
67#define EMIT_ATTR( ATTR, STYLE, F0 )					\
68do {									\
69   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR);	\
70   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE);	\
71   rmesa->swtcl.vertex_attr_count++;					\
72   fmt_0 |= F0;								\
73} while (0)
74
75#define EMIT_PAD( N )							\
76do {									\
77   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0;		\
78   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD;	\
79   rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N);		\
80   rmesa->swtcl.vertex_attr_count++;					\
81} while (0)
82
83static GLuint radeon_cp_vc_frmts[3][2] =
84{
85   { RADEON_CP_VC_FRMT_ST0, RADEON_CP_VC_FRMT_ST0 | RADEON_CP_VC_FRMT_Q0 },
86   { RADEON_CP_VC_FRMT_ST1, RADEON_CP_VC_FRMT_ST1 | RADEON_CP_VC_FRMT_Q1 },
87   { RADEON_CP_VC_FRMT_ST2, RADEON_CP_VC_FRMT_ST2 | RADEON_CP_VC_FRMT_Q2 },
88};
89
90static void radeonSetVertexFormat( GLcontext *ctx )
91{
92   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
93   TNLcontext *tnl = TNL_CONTEXT(ctx);
94   struct vertex_buffer *VB = &tnl->vb;
95   GLuint index = tnl->render_inputs;
96   int fmt_0 = 0;
97   int offset = 0;
98
99
100   /* Important:
101    */
102   if ( VB->NdcPtr != NULL ) {
103      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
104   }
105   else {
106      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
107   }
108
109   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
110   rmesa->swtcl.vertex_attr_count = 0;
111
112   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
113    * build up a hardware vertex.
114    */
115   if ( !rmesa->swtcl.needproj ||
116        (index & _TNL_BITS_TEX_ANY)) {	/* for projtex */
117      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F,
118		 RADEON_CP_VC_FRMT_XY |	RADEON_CP_VC_FRMT_Z | RADEON_CP_VC_FRMT_W0 );
119      offset = 4;
120   }
121   else {
122      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F,
123		 RADEON_CP_VC_FRMT_XY |	RADEON_CP_VC_FRMT_Z );
124      offset = 3;
125   }
126
127   rmesa->swtcl.coloroffset = offset;
128#if MESA_LITTLE_ENDIAN
129   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA,
130	      RADEON_CP_VC_FRMT_PKCOLOR );
131#else
132   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR,
133	      RADEON_CP_VC_FRMT_PKCOLOR );
134#endif
135   offset += 1;
136
137   rmesa->swtcl.specoffset = 0;
138   if (index & (_TNL_BIT_COLOR1|_TNL_BIT_FOG)) {
139
140#if MESA_LITTLE_ENDIAN
141      if (index & _TNL_BIT_COLOR1) {
142	 rmesa->swtcl.specoffset = offset;
143	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB,
144	 	    RADEON_CP_VC_FRMT_PKSPEC );
145      }
146      else {
147	 EMIT_PAD( 3 );
148      }
149
150      if (index & _TNL_BIT_FOG) {
151	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
152	 	    RADEON_CP_VC_FRMT_PKSPEC );
153      }
154      else {
155	 EMIT_PAD( 1 );
156      }
157#else
158      if (index & _TNL_BIT_FOG) {
159	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
160	 	    RADEON_CP_VC_FRMT_PKSPEC );
161      }
162      else {
163	 EMIT_PAD( 1 );
164      }
165
166      if (index & _TNL_BIT_COLOR1) {
167	 rmesa->swtcl.specoffset = offset;
168	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR,
169	 	    RADEON_CP_VC_FRMT_PKSPEC );
170      }
171      else {
172	 EMIT_PAD( 3 );
173      }
174#endif
175   }
176
177   if (index & _TNL_BITS_TEX_ANY) {
178      int i;
179
180      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
181	 if (index & _TNL_BIT_TEX(i)) {
182	    GLuint sz = VB->TexCoordPtr[i]->size;
183
184	    switch (sz) {
185	    case 1:
186	    case 2:
187	    case 3:
188	       EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_2F,
189			  radeon_cp_vc_frmts[i][0] );
190	       break;
191	    case 4:
192	       EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F_XYW,
193			  radeon_cp_vc_frmts[i][1] );
194	       break;
195	    default:
196	       continue;
197	    };
198	 }
199      }
200   }
201
202   if ( rmesa->tnl_index != index ||
203	fmt_0 != rmesa->swtcl.vertex_format) {
204      RADEON_NEWPRIM(rmesa);
205      rmesa->swtcl.vertex_format = fmt_0;
206      rmesa->swtcl.vertex_size =
207	  _tnl_install_attrs( ctx,
208			      rmesa->swtcl.vertex_attrs,
209			      rmesa->swtcl.vertex_attr_count,
210			      NULL, 0 );
211      rmesa->swtcl.vertex_size /= 4;
212      rmesa->tnl_index = index;
213      if (RADEON_DEBUG & DEBUG_VERTS)
214	 fprintf( stderr, "%s: vertex_size= %d floats\n",
215		  __FUNCTION__, rmesa->swtcl.vertex_size);
216   }
217}
218
219
220static void radeonRenderStart( GLcontext *ctx )
221{
222   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
223
224   radeonSetVertexFormat( ctx );
225
226   if (rmesa->dma.flush != 0 &&
227       rmesa->dma.flush != flush_last_swtcl_prim)
228      rmesa->dma.flush( rmesa );
229}
230
231
232/**
233 * Set vertex state for SW TCL.  The primary purpose of this function is to
234 * determine in advance whether or not the hardware can / should do the
235 * projection divide or Mesa should do it.
236 */
237void radeonChooseVertexState( GLcontext *ctx )
238{
239   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
240   TNLcontext *tnl = TNL_CONTEXT(ctx);
241
242   GLuint se_coord_fmt;
243
244   /* HW perspective divide is a win, but tiny vertex formats are a
245    * bigger one.
246    */
247
248   if ( ((tnl->render_inputs & (_TNL_BITS_TEX_ANY|_TNL_BIT_COLOR1) ) == 0)
249	|| (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
250      rmesa->swtcl.needproj = GL_TRUE;
251      se_coord_fmt = (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
252		      RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
253		      RADEON_TEX1_W_ROUTING_USE_Q1);
254   }
255   else {
256      rmesa->swtcl.needproj = GL_FALSE;
257      se_coord_fmt = (RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
258		      RADEON_TEX1_W_ROUTING_USE_Q1);
259   }
260
261   _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj );
262
263   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
264      RADEON_STATECHANGE( rmesa, set );
265      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
266   }
267}
268
269
270/* Flush vertices in the current dma region.
271 */
272static void flush_last_swtcl_prim( radeonContextPtr rmesa  )
273{
274   if (RADEON_DEBUG & DEBUG_IOCTL)
275      fprintf(stderr, "%s\n", __FUNCTION__);
276
277   rmesa->dma.flush = NULL;
278
279   if (rmesa->dma.current.buf) {
280      struct radeon_dma_region *current = &rmesa->dma.current;
281      GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset +
282			       current->buf->buf->idx * RADEON_BUFFER_SIZE +
283			       current->start);
284
285      assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
286
287      assert (current->start +
288	      rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
289	      current->ptr);
290
291      if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
292	 radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
293			          rmesa->hw.max_state_size + VBUF_BUFSZ );
294
295	 radeonEmitVertexAOS( rmesa,
296			      rmesa->swtcl.vertex_size,
297			      current_offset);
298
299	 radeonEmitVbufPrim( rmesa,
300			     rmesa->swtcl.vertex_format,
301			     rmesa->swtcl.hw_primitive,
302			     rmesa->swtcl.numverts);
303      }
304
305      rmesa->swtcl.numverts = 0;
306      current->start = current->ptr;
307   }
308}
309
310
311/* Alloc space in the current dma region.
312 */
313static __inline void *radeonAllocDmaLowVerts( radeonContextPtr rmesa,
314					      int nverts, int vsize )
315{
316   GLuint bytes = vsize * nverts;
317
318   if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
319      radeonRefillCurrentDmaRegion( rmesa );
320
321   if (!rmesa->dma.flush) {
322      rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
323      rmesa->dma.flush = flush_last_swtcl_prim;
324   }
325
326   assert( vsize == rmesa->swtcl.vertex_size * 4 );
327   assert( rmesa->dma.flush == flush_last_swtcl_prim );
328   assert (rmesa->dma.current.start +
329	   rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
330	   rmesa->dma.current.ptr);
331
332
333   {
334      GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
335      rmesa->dma.current.ptr += bytes;
336      rmesa->swtcl.numverts += nverts;
337      return head;
338   }
339
340}
341
342
343/*
344 * Render unclipped vertex buffers by emitting vertices directly to
345 * dma buffers.  Use strip/fan hardware primitives where possible.
346 * Try to simulate missing primitives with indexed vertices.
347 */
348#define HAVE_POINTS      1
349#define HAVE_LINES       1
350#define HAVE_LINE_STRIPS 1
351#define HAVE_TRIANGLES   1
352#define HAVE_TRI_STRIPS  1
353#define HAVE_TRI_STRIP_1 0
354#define HAVE_TRI_FANS    1
355#define HAVE_QUADS       0
356#define HAVE_QUAD_STRIPS 0
357#define HAVE_POLYGONS    0
358/* \todo: is it possible to make "ELTS" work with t_vertex code ? */
359#define HAVE_ELTS        0
360
361static const GLuint hw_prim[GL_POLYGON+1] = {
362   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
363   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
364   0,
365   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
366   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
367   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
368   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
369   0,
370   0,
371   0
372};
373
374static __inline void radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim )
375{
376   RADEON_NEWPRIM( rmesa );
377   rmesa->swtcl.hw_primitive = hw_prim[prim];
378   assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
379}
380
381#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx); (void)rmesa
382#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
383#define FLUSH()  RADEON_NEWPRIM( rmesa )
384#define GET_CURRENT_VB_MAX_VERTS() \
385  (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
386#define GET_SUBSEQUENT_VB_MAX_VERTS() \
387  ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
388#define ALLOC_VERTS( nr ) \
389  radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
390#define EMIT_VERTS( ctx, j, nr, buf ) \
391  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
392
393#define TAG(x) radeon_dma_##x
394#include "tnl_dd/t_dd_dmatmp.h"
395
396
397/**********************************************************************/
398/*                          Render pipeline stage                     */
399/**********************************************************************/
400
401
402static GLboolean radeon_run_render( GLcontext *ctx,
403				    struct tnl_pipeline_stage *stage )
404{
405   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
406   TNLcontext *tnl = TNL_CONTEXT(ctx);
407   struct vertex_buffer *VB = &tnl->vb;
408   tnl_render_func *tab = TAG(render_tab_verts);
409   GLuint i;
410
411   if (rmesa->swtcl.indexed_verts.buf)
412      RELEASE_ELT_VERTS();
413
414   if (rmesa->swtcl.RenderIndex != 0 ||
415       !radeon_dma_validate_render( ctx, VB ))
416      return GL_TRUE;
417
418   tnl->Driver.Render.Start( ctx );
419
420   for (i = 0 ; i < VB->PrimitiveCount ; i++)
421   {
422      GLuint prim = VB->Primitive[i].mode;
423      GLuint start = VB->Primitive[i].start;
424      GLuint length = VB->Primitive[i].count;
425
426      if (!length)
427	 continue;
428
429      if (RADEON_DEBUG & DEBUG_PRIMS)
430	 fprintf(stderr, "radeon_render.c: prim %s %d..%d\n",
431		 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK),
432		 start, start+length);
433
434      if (length)
435	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
436   }
437
438   tnl->Driver.Render.Finish( ctx );
439
440   return GL_FALSE;		/* finished the pipe */
441}
442
443
444
445
446const struct tnl_pipeline_stage _radeon_render_stage =
447{
448   "radeon render",
449   NULL,
450   NULL,
451   NULL,
452   NULL,
453   radeon_run_render		/* run */
454};
455
456
457/**************************************************************************/
458
459/* Radeon texture rectangle expects coords in 0..1 range, not 0..dimension
460 * as in the extension spec.  Need to translate here.
461 *
462 * Note that swrast expects 0..dimension, so if a fallback is active,
463 * don't do anything.  (Maybe need to configure swrast to match hw)
464 */
465struct texrect_stage_data {
466   GLvector4f texcoord[MAX_TEXTURE_UNITS];
467};
468
469#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr)
470
471
472static GLboolean run_texrect_stage( GLcontext *ctx,
473				    struct tnl_pipeline_stage *stage )
474{
475   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
476   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
477   TNLcontext *tnl = TNL_CONTEXT(ctx);
478   struct vertex_buffer *VB = &tnl->vb;
479   GLuint i;
480
481   if (rmesa->Fallback)
482      return GL_TRUE;
483
484   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
485      if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) {
486	 struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect;
487	 struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel];
488	 const GLfloat iw = 1.0/texImage->Width;
489	 const GLfloat ih = 1.0/texImage->Height;
490	 GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data;
491	 GLint instride = VB->TexCoordPtr[i]->stride;
492	 GLfloat (*out)[4] = store->texcoord[i].data;
493	 GLint j;
494
495	 for (j = 0 ; j < VB->Count ; j++) {
496	    out[j][0] = in[0] * iw;
497	    out[j][1] = in[1] * ih;
498	    in = (GLfloat *)((GLubyte *)in + instride);
499	 }
500
501	 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i];
502      }
503   }
504
505   return GL_TRUE;
506}
507
508
509/* Called the first time stage->run() is invoked.
510 */
511static GLboolean alloc_texrect_data( GLcontext *ctx,
512				     struct tnl_pipeline_stage *stage )
513{
514   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
515   struct texrect_stage_data *store;
516   GLuint i;
517
518   stage->privatePtr = CALLOC(sizeof(*store));
519   store = TEXRECT_STAGE_DATA(stage);
520   if (!store)
521      return GL_FALSE;
522
523   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
524      _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
525
526   return GL_TRUE;
527}
528
529static void free_texrect_data( struct tnl_pipeline_stage *stage )
530{
531   struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage);
532   GLuint i;
533
534   if (store) {
535      for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
536	 if (store->texcoord[i].data)
537	    _mesa_vector4f_free( &store->texcoord[i] );
538      FREE( store );
539      stage->privatePtr = NULL;
540   }
541}
542
543const struct tnl_pipeline_stage _radeon_texrect_stage =
544{
545   "radeon texrect stage",			/* name */
546   NULL,
547   alloc_texrect_data,
548   free_texrect_data,
549   NULL,
550   run_texrect_stage
551};
552
553
554/**************************************************************************/
555
556
557static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
558   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
559   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
560   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
561   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
562   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
563   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
564   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
565   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
566   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
567   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
568};
569
570static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim );
571static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim );
572static void radeonResetLineStipple( GLcontext *ctx );
573
574
575/***********************************************************************
576 *                    Emit primitives as inline vertices               *
577 ***********************************************************************/
578
579#undef LOCAL_VARS
580#undef ALLOC_VERTS
581#define CTX_ARG radeonContextPtr rmesa
582#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
583#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
584#undef LOCAL_VARS
585#define LOCAL_VARS						\
586   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
587   const char *radeonverts = (char *)rmesa->swtcl.verts;
588#define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
589#define VERTEX radeonVertex
590#undef TAG
591#define TAG(x) radeon_##x
592#include "tnl_dd/t_dd_triemit.h"
593
594
595/***********************************************************************
596 *          Macros for t_dd_tritmp.h to draw basic primitives          *
597 ***********************************************************************/
598
599#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d )
600#define TRI( a, b, c )     radeon_triangle( rmesa, a, b, c )
601#define LINE( a, b )       radeon_line( rmesa, a, b )
602#define POINT( a )         radeon_point( rmesa, a )
603
604/***********************************************************************
605 *              Build render functions from dd templates               *
606 ***********************************************************************/
607
608#define RADEON_TWOSIDE_BIT	0x01
609#define RADEON_UNFILLED_BIT	0x02
610#define RADEON_MAX_TRIFUNC	0x08
611
612
613static struct {
614   tnl_points_func	        points;
615   tnl_line_func		line;
616   tnl_triangle_func	triangle;
617   tnl_quad_func		quad;
618} rast_tab[RADEON_MAX_TRIFUNC];
619
620
621#define DO_FALLBACK  0
622#define DO_OFFSET    0
623#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
624#define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
625#define DO_FLAT      0
626#define DO_TRI       1
627#define DO_QUAD      1
628#define DO_LINE      1
629#define DO_POINTS    1
630#define DO_FULL_QUAD 1
631
632#define HAVE_RGBA   1
633#define HAVE_SPEC   1
634#define HAVE_BACK_COLORS  0
635#define HAVE_HW_FLATSHADE 1
636#define TAB rast_tab
637
638#define DEPTH_SCALE 1.0
639#define UNFILLED_TRI unfilled_tri
640#define UNFILLED_QUAD unfilled_quad
641#define VERT_X(_v) _v->v.x
642#define VERT_Y(_v) _v->v.y
643#define VERT_Z(_v) _v->v.z
644#define AREA_IS_CCW( a ) (a < 0)
645#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
646
647#define VERT_SET_RGBA( v, c )  					\
648do {								\
649   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);	\
650   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
651   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
652   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
653   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
654} while (0)
655
656#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
657
658#define VERT_SET_SPEC( v, c )					\
659do {								\
660   if (specoffset) {						\
661      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
662      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
663      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
664      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
665   }								\
666} while (0)
667#define VERT_COPY_SPEC( v0, v1 )			\
668do {							\
669   if (specoffset) {					\
670      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
671      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
672      spec0->red   = spec1->red;	\
673      spec0->green = spec1->green;	\
674      spec0->blue  = spec1->blue; 	\
675   }							\
676} while (0)
677
678/* These don't need LE32_TO_CPU() as they used to save and restore
679 * colors which are already in the correct format.
680 */
681#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
682#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
683#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
684#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
685
686#undef LOCAL_VARS
687#undef TAG
688#undef INIT
689
690#define LOCAL_VARS(n)							\
691   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
692   GLuint color[n], spec[n];						\
693   GLuint coloroffset = rmesa->swtcl.coloroffset;	\
694   GLuint specoffset = rmesa->swtcl.specoffset;			\
695   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
696
697/***********************************************************************
698 *                Helpers for rendering unfilled primitives            *
699 ***********************************************************************/
700
701#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
702#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
703#undef TAG
704#define TAG(x) x
705#include "tnl_dd/t_dd_unfilled.h"
706#undef IND
707
708
709/***********************************************************************
710 *                      Generate GL render functions                   *
711 ***********************************************************************/
712
713
714#define IND (0)
715#define TAG(x) x
716#include "tnl_dd/t_dd_tritmp.h"
717
718#define IND (RADEON_TWOSIDE_BIT)
719#define TAG(x) x##_twoside
720#include "tnl_dd/t_dd_tritmp.h"
721
722#define IND (RADEON_UNFILLED_BIT)
723#define TAG(x) x##_unfilled
724#include "tnl_dd/t_dd_tritmp.h"
725
726#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT)
727#define TAG(x) x##_twoside_unfilled
728#include "tnl_dd/t_dd_tritmp.h"
729
730
731static void init_rast_tab( void )
732{
733   init();
734   init_twoside();
735   init_unfilled();
736   init_twoside_unfilled();
737}
738
739/**********************************************************************/
740/*               Render unclipped begin/end objects                   */
741/**********************************************************************/
742
743#define RENDER_POINTS( start, count )		\
744   for ( ; start < count ; start++)		\
745      radeon_point( rmesa, VERT(start) )
746#define RENDER_LINE( v0, v1 ) \
747   radeon_line( rmesa, VERT(v0), VERT(v1) )
748#define RENDER_TRI( v0, v1, v2 )  \
749   radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
750#define RENDER_QUAD( v0, v1, v2, v3 ) \
751   radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
752#undef INIT
753#define INIT(x) do {					\
754   radeonRenderPrimitive( ctx, x );			\
755} while (0)
756#undef LOCAL_VARS
757#define LOCAL_VARS						\
758   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);		\
759   const GLuint vertsize = rmesa->swtcl.vertex_size;		\
760   const char *radeonverts = (char *)rmesa->swtcl.verts;		\
761   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
762   const GLboolean stipple = ctx->Line.StippleFlag;		\
763   (void) elt; (void) stipple;
764#define RESET_STIPPLE	if ( stipple ) radeonResetLineStipple( ctx );
765#define RESET_OCCLUSION
766#define PRESERVE_VB_DEFS
767#define ELT(x) (x)
768#define TAG(x) radeon_##x##_verts
769#include "tnl/t_vb_rendertmp.h"
770#undef ELT
771#undef TAG
772#define TAG(x) radeon_##x##_elts
773#define ELT(x) elt[x]
774#include "tnl/t_vb_rendertmp.h"
775
776
777
778/**********************************************************************/
779/*                    Choose render functions                         */
780/**********************************************************************/
781
782void radeonChooseRenderState( GLcontext *ctx )
783{
784   TNLcontext *tnl = TNL_CONTEXT(ctx);
785   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
786   GLuint index = 0;
787   GLuint flags = ctx->_TriangleCaps;
788
789   if (!rmesa->TclFallback || rmesa->Fallback)
790      return;
791
792   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
793   if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
794
795   if (index != rmesa->swtcl.RenderIndex) {
796      tnl->Driver.Render.Points = rast_tab[index].points;
797      tnl->Driver.Render.Line = rast_tab[index].line;
798      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
799      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
800      tnl->Driver.Render.Quad = rast_tab[index].quad;
801
802      if (index == 0) {
803	 tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts;
804	 tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts;
805	 tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly;
806      } else {
807	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
808	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
809	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
810      }
811
812      rmesa->swtcl.RenderIndex = index;
813   }
814}
815
816
817/**********************************************************************/
818/*                 High level hooks for t_vb_render.c                 */
819/**********************************************************************/
820
821
822static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
823{
824   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
825
826   if (rmesa->swtcl.hw_primitive != hwprim) {
827      RADEON_NEWPRIM( rmesa );
828      rmesa->swtcl.hw_primitive = hwprim;
829   }
830}
831
832static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
833{
834   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
835   rmesa->swtcl.render_primitive = prim;
836   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
837      radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
838}
839
840static void radeonRenderFinish( GLcontext *ctx )
841{
842}
843
844static void radeonResetLineStipple( GLcontext *ctx )
845{
846   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
847   RADEON_STATECHANGE( rmesa, lin );
848}
849
850
851/**********************************************************************/
852/*           Transition to/from hardware rasterization.               */
853/**********************************************************************/
854
855static const char * const fallbackStrings[] = {
856   "Texture mode",
857   "glDrawBuffer(GL_FRONT_AND_BACK)",
858   "glEnable(GL_STENCIL) without hw stencil buffer",
859   "glRenderMode(selection or feedback)",
860   "glBlendEquation",
861   "glBlendFunc",
862   "RADEON_NO_RAST",
863   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
864};
865
866
867static const char *getFallbackString(GLuint bit)
868{
869   int i = 0;
870   while (bit > 1) {
871      i++;
872      bit >>= 1;
873   }
874   return fallbackStrings[i];
875}
876
877
878void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
879{
880   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
881   TNLcontext *tnl = TNL_CONTEXT(ctx);
882   GLuint oldfallback = rmesa->Fallback;
883
884   if (mode) {
885      rmesa->Fallback |= bit;
886      if (oldfallback == 0) {
887	 RADEON_FIREVERTICES( rmesa );
888	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
889	 _swsetup_Wakeup( ctx );
890	 _tnl_need_projected_coords( ctx, GL_TRUE );
891	 rmesa->swtcl.RenderIndex = ~0;
892         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
893            fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
894                    bit, getFallbackString(bit));
895         }
896      }
897   }
898   else {
899      rmesa->Fallback &= ~bit;
900      if (oldfallback == bit) {
901	 _swrast_flush( ctx );
902	 tnl->Driver.Render.Start = radeonRenderStart;
903	 tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
904	 tnl->Driver.Render.Finish = radeonRenderFinish;
905
906	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
907	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
908	 tnl->Driver.Render.Interp = _tnl_interp;
909
910	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
911	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
912	 if (rmesa->TclFallback) {
913	    /* These are already done if rmesa->TclFallback goes to
914	     * zero above. But not if it doesn't (RADEON_NO_TCL for
915	     * example?)
916	     */
917	    radeonChooseVertexState( ctx );
918	    radeonChooseRenderState( ctx );
919	 }
920         if (RADEON_DEBUG & DEBUG_FALLBACKS) {
921            fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
922                    bit, getFallbackString(bit));
923         }
924      }
925   }
926}
927
928
929void radeonFlushVertices( GLcontext *ctx, GLuint flags )
930{
931   _tnl_FlushVertices( ctx, flags );
932
933   if (flags & FLUSH_STORED_VERTICES)
934      RADEON_NEWPRIM( RADEON_CONTEXT( ctx ) );
935}
936
937/**********************************************************************/
938/*                            Initialization.                         */
939/**********************************************************************/
940
941void radeonInitSwtcl( GLcontext *ctx )
942{
943   TNLcontext *tnl = TNL_CONTEXT(ctx);
944   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
945   static int firsttime = 1;
946
947   if (firsttime) {
948      init_rast_tab();
949      firsttime = 0;
950   }
951
952   tnl->Driver.Render.Start = radeonRenderStart;
953   tnl->Driver.Render.Finish = radeonRenderFinish;
954   tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
955   tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
956   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
957   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
958   tnl->Driver.Render.Interp = _tnl_interp;
959
960   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
961		       RADEON_MAX_TNL_VERTEX_SIZE);
962
963   rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
964   rmesa->swtcl.RenderIndex = ~0;
965   rmesa->swtcl.render_primitive = GL_TRIANGLES;
966   rmesa->swtcl.hw_primitive = 0;
967}
968
969
970void radeonDestroySwtcl( GLcontext *ctx )
971{
972   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
973
974   if (rmesa->swtcl.indexed_verts.buf)
975      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
976			      __FUNCTION__ );
977}
978