radeon_tcl.c revision 26473140b9c5e6aa962961c836f79fd5aa6cd246
1/* $XFree86$ */
2/**************************************************************************
3
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     Tungsten Graphics Inc., Austin, Texas.
6
7All Rights Reserved.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice (including the
18next paragraph) shall be included in all copies or substantial
19portions of the Software.
20
21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29**************************************************************************/
30
31/*
32 * Authors:
33 *   Keith Whitwell <keith@tungstengraphics.com>
34 */
35
36#include "glheader.h"
37#include "imports.h"
38#include "light.h"
39#include "mtypes.h"
40#include "enums.h"
41
42#include "vbo/vbo.h"
43#include "tnl/tnl.h"
44#include "tnl/t_pipeline.h"
45
46#include "radeon_context.h"
47#include "radeon_state.h"
48#include "radeon_ioctl.h"
49#include "radeon_tex.h"
50#include "radeon_tcl.h"
51#include "radeon_swtcl.h"
52#include "radeon_maos.h"
53
54
55
56/*
57 * Render unclipped vertex buffers by emitting vertices directly to
58 * dma buffers.  Use strip/fan hardware primitives where possible.
59 * Try to simulate missing primitives with indexed vertices.
60 */
61#define HAVE_POINTS      1
62#define HAVE_LINES       1
63#define HAVE_LINE_LOOP   0
64#define HAVE_LINE_STRIPS 1
65#define HAVE_TRIANGLES   1
66#define HAVE_TRI_STRIPS  1
67#define HAVE_TRI_STRIP_1 0
68#define HAVE_TRI_FANS    1
69#define HAVE_QUADS       0
70#define HAVE_QUAD_STRIPS 0
71#define HAVE_POLYGONS    1
72#define HAVE_ELTS        1
73
74
75#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
76#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
77#define HW_LINE_LOOP        0
78#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
79#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
80#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
81#define HW_TRIANGLE_STRIP_1 0
82#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
83#define HW_QUADS            0
84#define HW_QUAD_STRIP       0
85#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
86
87
88static GLboolean discrete_prim[0x10] = {
89   0,				/* 0 none */
90   1,				/* 1 points */
91   1,				/* 2 lines */
92   0,				/* 3 line_strip */
93   1,				/* 4 tri_list */
94   0,				/* 5 tri_fan */
95   0,				/* 6 tri_type2 */
96   1,				/* 7 rect list (unused) */
97   1,				/* 8 3vert point */
98   1,				/* 9 3vert line */
99   0,
100   0,
101   0,
102   0,
103   0,
104   0,
105};
106
107
108#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx)
109#define ELT_TYPE  GLushort
110
111#define ELT_INIT(prim, hw_prim) \
112   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
113
114#define GET_MESA_ELTS() rmesa->tcl.Elts
115
116
117/* Don't really know how many elts will fit in what's left of cmdbuf,
118 * as there is state to emit, etc:
119 */
120
121/* Testing on isosurf shows a maximum around here.  Don't know if it's
122 * the card or driver or kernel module that is causing the behaviour.
123 */
124#define GET_MAX_HW_ELTS() 300
125
126
127#define RESET_STIPPLE() do {			\
128   RADEON_STATECHANGE( rmesa, lin );		\
129   radeonEmitState( rmesa );			\
130} while (0)
131
132#define AUTO_STIPPLE( mode )  do {		\
133   RADEON_STATECHANGE( rmesa, lin );		\
134   if (mode)					\
135      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
136	 RADEON_LINE_PATTERN_AUTO_RESET;	\
137   else						\
138      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
139	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
140   radeonEmitState( rmesa );			\
141} while (0)
142
143
144
145#define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
146
147static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr )
148{
149   if (rmesa->dma.flush)
150      rmesa->dma.flush( rmesa );
151
152   radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
153			   rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
154
155   radeonEmitAOS( rmesa,
156		rmesa->tcl.aos_components,
157		rmesa->tcl.nr_aos_components, 0 );
158
159   return radeonAllocEltsOpenEnded( rmesa,
160				    rmesa->tcl.vertex_format,
161				    rmesa->tcl.hw_primitive, nr );
162}
163
164#define CLOSE_ELTS()  RADEON_NEWPRIM( rmesa )
165
166
167
168/* TODO: Try to extend existing primitive if both are identical,
169 * discrete and there are no intervening state changes.  (Somewhat
170 * duplicates changes to DrawArrays code)
171 */
172static void radeonEmitPrim( GLcontext *ctx,
173		       GLenum prim,
174		       GLuint hwprim,
175		       GLuint start,
176		       GLuint count)
177{
178   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
179   radeonTclPrimitive( ctx, prim, hwprim );
180
181   radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
182			    rmesa->hw.max_state_size + VBUF_BUFSZ );
183
184   radeonEmitAOS( rmesa,
185		  rmesa->tcl.aos_components,
186		  rmesa->tcl.nr_aos_components,
187		  start );
188
189   /* Why couldn't this packet have taken an offset param?
190    */
191   radeonEmitVbufPrim( rmesa,
192		       rmesa->tcl.vertex_format,
193		       rmesa->tcl.hw_primitive,
194		       count - start );
195}
196
197#define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
198   radeonEmitPrim( ctx, prim, hwprim, start, count );           \
199   (void) rmesa; } while (0)
200
201/* Try & join small primitives
202 */
203#if 0
204#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
205#else
206#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
207  ((NR) < 20 ||							\
208   ((NR) < 40 &&						\
209    rmesa->tcl.hw_primitive == (PRIM|				\
210			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
211			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
212#endif
213
214#ifdef MESA_BIG_ENDIAN
215/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
216#define EMIT_ELT(dest, offset, x) do {				\
217	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
218	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
219	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); 	\
220	(void)rmesa; } while (0)
221#else
222#define EMIT_ELT(dest, offset, x) do {				\
223	(dest)[offset] = (GLushort) (x);			\
224	(void)rmesa; } while (0)
225#endif
226
227#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
228
229
230
231#define TAG(x) tcl_##x
232#include "tnl_dd/t_dd_dmatmp2.h"
233
234/**********************************************************************/
235/*                          External entrypoints                     */
236/**********************************************************************/
237
238void radeonEmitPrimitive( GLcontext *ctx,
239			  GLuint first,
240			  GLuint last,
241			  GLuint flags )
242{
243   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
244}
245
246void radeonEmitEltPrimitive( GLcontext *ctx,
247			     GLuint first,
248			     GLuint last,
249			     GLuint flags )
250{
251   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
252}
253
254void radeonTclPrimitive( GLcontext *ctx,
255			 GLenum prim,
256			 int hw_prim )
257{
258   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
259   GLuint se_cntl;
260   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
261
262   if (newprim != rmesa->tcl.hw_primitive ||
263       !discrete_prim[hw_prim&0xf]) {
264      RADEON_NEWPRIM( rmesa );
265      rmesa->tcl.hw_primitive = newprim;
266   }
267
268   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
269   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
270
271   if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE))
272      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
273   else
274      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
275
276   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
277      RADEON_STATECHANGE( rmesa, set );
278      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
279   }
280}
281
282/**********************************************************************/
283/*             Fog blend factor computation for hw tcl                */
284/*             same calculation used as in t_vb_fog.c                 */
285/**********************************************************************/
286
287#define FOG_EXP_TABLE_SIZE 256
288#define FOG_MAX (10.0)
289#define EXP_FOG_MAX .0006595
290#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
291static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
292
293#if 1
294#define NEG_EXP( result, narg )						\
295do {									\
296   GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));			\
297   GLint k = (GLint) f;							\
298   if (k > FOG_EXP_TABLE_SIZE-2) 					\
299      result = (GLfloat) EXP_FOG_MAX;					\
300   else									\
301      result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);	\
302} while (0)
303#else
304#define NEG_EXP( result, narg )					\
305do {								\
306   result = exp(-narg);						\
307} while (0)
308#endif
309
310
311/**
312 * Initialize the exp_table[] lookup table for approximating exp().
313 */
314void
315radeonInitStaticFogData( void )
316{
317   GLfloat f = 0.0F;
318   GLint i = 0;
319   for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
320      exp_table[i] = (GLfloat) exp(-f);
321   }
322}
323
324
325/**
326 * Compute per-vertex fog blend factors from fog coordinates by
327 * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
328 * Fog coordinates are distances from the eye (typically between the
329 * near and far clip plane distances).
330 * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
331 * Fog blend factors are in the range [0,1].
332 */
333float
334radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
335{
336   GLfloat end  = ctx->Fog.End;
337   GLfloat d, temp;
338   const GLfloat z = FABSF(fogcoord);
339
340   switch (ctx->Fog.Mode) {
341   case GL_LINEAR:
342      if (ctx->Fog.Start == ctx->Fog.End)
343         d = 1.0F;
344      else
345         d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
346      temp = (end - z) * d;
347      return CLAMP(temp, 0.0F, 1.0F);
348      break;
349   case GL_EXP:
350      d = ctx->Fog.Density;
351      NEG_EXP( temp, d * z );
352      return temp;
353      break;
354   case GL_EXP2:
355      d = ctx->Fog.Density*ctx->Fog.Density;
356      NEG_EXP( temp, d * z * z );
357      return temp;
358      break;
359   default:
360      _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
361      return 0;
362   }
363}
364
365/**********************************************************************/
366/*                          Render pipeline stage                     */
367/**********************************************************************/
368
369
370/* TCL render.
371 */
372static GLboolean radeon_run_tcl_render( GLcontext *ctx,
373					struct tnl_pipeline_stage *stage )
374{
375   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
376   TNLcontext *tnl = TNL_CONTEXT(ctx);
377   struct vertex_buffer *VB = &tnl->vb;
378   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
379   GLuint i;
380
381   /* TODO: separate this from the swtnl pipeline
382    */
383   if (rmesa->TclFallback)
384      return GL_TRUE;	/* fallback to software t&l */
385
386   if (VB->Count == 0)
387      return GL_FALSE;
388
389   /* NOTE: inputs != tnl->render_inputs - these are the untransformed
390    * inputs.
391    */
392   if (ctx->Light.Enabled) {
393      inputs |= VERT_BIT_NORMAL;
394   }
395
396   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
397      inputs |= VERT_BIT_COLOR1;
398   }
399
400   if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
401      inputs |= VERT_BIT_FOG;
402   }
403
404   for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
405      if (ctx->Texture.Unit[i]._ReallyEnabled) {
406      /* TODO: probably should not emit texture coords when texgen is enabled */
407	 if (rmesa->TexGenNeedNormals[i]) {
408	    inputs |= VERT_BIT_NORMAL;
409	 }
410	 inputs |= VERT_BIT_TEX(i);
411      }
412   }
413
414   radeonReleaseArrays( ctx, ~0 );
415   radeonEmitArrays( ctx, inputs );
416
417   rmesa->tcl.Elts = VB->Elts;
418
419   for (i = 0 ; i < VB->PrimitiveCount ; i++)
420   {
421      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
422      GLuint start = VB->Primitive[i].start;
423      GLuint length = VB->Primitive[i].count;
424
425      if (!length)
426	 continue;
427
428      if (rmesa->tcl.Elts)
429	 radeonEmitEltPrimitive( ctx, start, start+length, prim );
430      else
431	 radeonEmitPrimitive( ctx, start, start+length, prim );
432   }
433
434   return GL_FALSE;		/* finished the pipe */
435}
436
437
438
439/* Initial state for tcl stage.
440 */
441const struct tnl_pipeline_stage _radeon_tcl_stage =
442{
443   "radeon render",
444   NULL,
445   NULL,
446   NULL,
447   NULL,
448   radeon_run_tcl_render	/* run */
449};
450
451
452
453/**********************************************************************/
454/*                 Validate state at pipeline start                   */
455/**********************************************************************/
456
457
458/*-----------------------------------------------------------------------
459 * Manage TCL fallbacks
460 */
461
462
463static void transition_to_swtnl( GLcontext *ctx )
464{
465   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
466   TNLcontext *tnl = TNL_CONTEXT(ctx);
467   GLuint se_cntl;
468
469   RADEON_NEWPRIM( rmesa );
470   rmesa->swtcl.vertex_format = 0;
471
472   radeonChooseVertexState( ctx );
473   radeonChooseRenderState( ctx );
474
475   _mesa_validate_all_lighting_tables( ctx );
476
477   tnl->Driver.NotifyMaterialChange =
478      _mesa_validate_all_lighting_tables;
479
480   radeonReleaseArrays( ctx, ~0 );
481
482   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
483   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
484
485   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
486      RADEON_STATECHANGE( rmesa, set );
487      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
488   }
489}
490
491
492static void transition_to_hwtnl( GLcontext *ctx )
493{
494   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
495   TNLcontext *tnl = TNL_CONTEXT(ctx);
496   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
497
498   se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
499		     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
500		     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
501   se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
502
503   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
504      RADEON_STATECHANGE( rmesa, set );
505      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
506      _tnl_need_projected_coords( ctx, GL_FALSE );
507   }
508
509   radeonUpdateMaterial( ctx );
510
511   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
512
513   if ( rmesa->dma.flush )
514      rmesa->dma.flush( rmesa );
515
516   rmesa->dma.flush = NULL;
517   rmesa->swtcl.vertex_format = 0;
518
519   if (rmesa->swtcl.indexed_verts.buf)
520      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
521			      __FUNCTION__ );
522
523   if (RADEON_DEBUG & DEBUG_FALLBACKS)
524      fprintf(stderr, "Radeon end tcl fallback\n");
525}
526
527static char *fallbackStrings[] = {
528   "Rasterization fallback",
529   "Unfilled triangles",
530   "Twosided lighting, differing materials",
531   "Materials in VB (maybe between begin/end)",
532   "Texgen unit 0",
533   "Texgen unit 1",
534   "Texgen unit 2",
535   "User disable",
536   "Fogcoord with separate specular lighting"
537};
538
539
540static char *getFallbackString(GLuint bit)
541{
542   int i = 0;
543   while (bit > 1) {
544      i++;
545      bit >>= 1;
546   }
547   return fallbackStrings[i];
548}
549
550
551
552void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
553{
554   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
555   GLuint oldfallback = rmesa->TclFallback;
556
557   if (mode) {
558      rmesa->TclFallback |= bit;
559      if (oldfallback == 0) {
560	 if (RADEON_DEBUG & DEBUG_FALLBACKS)
561	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
562		    getFallbackString( bit ));
563	 transition_to_swtnl( ctx );
564      }
565   }
566   else {
567      rmesa->TclFallback &= ~bit;
568      if (oldfallback == bit) {
569	 if (RADEON_DEBUG & DEBUG_FALLBACKS)
570	    fprintf(stderr, "Radeon end tcl fallback %s\n",
571		    getFallbackString( bit ));
572	 transition_to_hwtnl( ctx );
573      }
574   }
575}
576