1/*
2 * Copyright 2003 VMware, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
19 * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 *    Keith Whitwell <keithw@vmware.com>
26 */
27
28#include <stdio.h>
29#include "main/glheader.h"
30#include "main/context.h"
31#include "swrast/s_chan.h"
32#include "t_context.h"
33#include "t_vertex.h"
34
35#define DBG 0
36
37/* Build and manage clipspace/ndc/window vertices.
38 */
39
40static GLboolean match_fastpath( struct tnl_clipspace *vtx,
41				 const struct tnl_clipspace_fastpath *fp)
42{
43   GLuint j;
44
45   if (vtx->attr_count != fp->attr_count)
46      return GL_FALSE;
47
48   for (j = 0; j < vtx->attr_count; j++)
49      if (vtx->attr[j].format != fp->attr[j].format ||
50	  vtx->attr[j].inputsize != fp->attr[j].size ||
51	  vtx->attr[j].vertoffset != fp->attr[j].offset)
52	 return GL_FALSE;
53
54   if (fp->match_strides) {
55      if (vtx->vertex_size != fp->vertex_size)
56	 return GL_FALSE;
57
58      for (j = 0; j < vtx->attr_count; j++)
59	 if (vtx->attr[j].inputstride != fp->attr[j].stride)
60	    return GL_FALSE;
61   }
62
63   return GL_TRUE;
64}
65
66static GLboolean search_fastpath_emit( struct tnl_clipspace *vtx )
67{
68   struct tnl_clipspace_fastpath *fp = vtx->fastpath;
69
70   for ( ; fp ; fp = fp->next) {
71      if (match_fastpath(vtx, fp)) {
72         vtx->emit = fp->func;
73	 return GL_TRUE;
74      }
75   }
76
77   return GL_FALSE;
78}
79
80void _tnl_register_fastpath( struct tnl_clipspace *vtx,
81			     GLboolean match_strides )
82{
83   struct tnl_clipspace_fastpath *fastpath = CALLOC_STRUCT(tnl_clipspace_fastpath);
84   GLuint i;
85
86   if (fastpath == NULL) {
87      _mesa_error_no_memory(__func__);
88      return;
89   }
90
91   fastpath->vertex_size = vtx->vertex_size;
92   fastpath->attr_count = vtx->attr_count;
93   fastpath->match_strides = match_strides;
94   fastpath->func = vtx->emit;
95   fastpath->attr = malloc(vtx->attr_count * sizeof(fastpath->attr[0]));
96
97   if (fastpath->attr == NULL) {
98      free(fastpath);
99      _mesa_error_no_memory(__func__);
100      return;
101   }
102
103   for (i = 0; i < vtx->attr_count; i++) {
104      fastpath->attr[i].format = vtx->attr[i].format;
105      fastpath->attr[i].stride = vtx->attr[i].inputstride;
106      fastpath->attr[i].size = vtx->attr[i].inputsize;
107      fastpath->attr[i].offset = vtx->attr[i].vertoffset;
108   }
109
110   fastpath->next = vtx->fastpath;
111   vtx->fastpath = fastpath;
112}
113
114
115
116/***********************************************************************
117 * Build codegen functions or return generic ones:
118 */
119static void choose_emit_func( struct gl_context *ctx, GLuint count, GLubyte *dest)
120{
121   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
122   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
123   struct tnl_clipspace_attr *a = vtx->attr;
124   const GLuint attr_count = vtx->attr_count;
125   GLuint j;
126
127   for (j = 0; j < attr_count; j++) {
128      GLvector4f *vptr = VB->AttribPtr[a[j].attrib];
129      a[j].inputstride = vptr->stride;
130      a[j].inputsize = vptr->size;
131      a[j].emit = a[j].insert[vptr->size - 1]; /* not always used */
132   }
133
134   vtx->emit = NULL;
135
136   /* Does this match an existing (hardwired, codegen or known-bad)
137    * fastpath?
138    */
139   if (search_fastpath_emit(vtx)) {
140      /* Use this result.  If it is null, then it is already known
141       * that the current state will fail for codegen and there is no
142       * point trying again.
143       */
144   }
145   else if (vtx->codegen_emit) {
146      vtx->codegen_emit(ctx);
147   }
148
149   if (!vtx->emit) {
150      _tnl_generate_hardwired_emit(ctx);
151   }
152
153   /* Otherwise use the generic version:
154    */
155   if (!vtx->emit)
156      vtx->emit = _tnl_generic_emit;
157
158   vtx->emit( ctx, count, dest );
159}
160
161
162
163static void choose_interp_func( struct gl_context *ctx,
164				GLfloat t,
165				GLuint edst, GLuint eout, GLuint ein,
166				GLboolean force_boundary )
167{
168   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
169   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
170                         ctx->Polygon.BackMode != GL_FILL);
171   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
172
173   if (vtx->need_extras && (twosided || unfilled)) {
174      vtx->interp = _tnl_generic_interp_extras;
175   } else {
176      vtx->interp = _tnl_generic_interp;
177   }
178
179   vtx->interp( ctx, t, edst, eout, ein, force_boundary );
180}
181
182
183static void choose_copy_pv_func(  struct gl_context *ctx, GLuint edst, GLuint esrc )
184{
185   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
186   GLboolean unfilled = (ctx->Polygon.FrontMode != GL_FILL ||
187                         ctx->Polygon.BackMode != GL_FILL);
188
189   GLboolean twosided = ctx->Light.Enabled && ctx->Light.Model.TwoSide;
190
191   if (vtx->need_extras && (twosided || unfilled)) {
192      vtx->copy_pv = _tnl_generic_copy_pv_extras;
193   } else {
194      vtx->copy_pv = _tnl_generic_copy_pv;
195   }
196
197   vtx->copy_pv( ctx, edst, esrc );
198}
199
200
201/***********************************************************************
202 * Public entrypoints, mostly dispatch to the above:
203 */
204
205
206/* Interpolate between two vertices to produce a third:
207 */
208void _tnl_interp( struct gl_context *ctx,
209		  GLfloat t,
210		  GLuint edst, GLuint eout, GLuint ein,
211		  GLboolean force_boundary )
212{
213   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
214   vtx->interp( ctx, t, edst, eout, ein, force_boundary );
215}
216
217/* Copy colors from one vertex to another:
218 */
219void _tnl_copy_pv(  struct gl_context *ctx, GLuint edst, GLuint esrc )
220{
221   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
222   vtx->copy_pv( ctx, edst, esrc );
223}
224
225
226/* Extract a named attribute from a hardware vertex.  Will have to
227 * reverse any viewport transformation, swizzling or other conversions
228 * which may have been applied:
229 */
230void _tnl_get_attr( struct gl_context *ctx, const void *vin,
231			      GLenum attr, GLfloat *dest )
232{
233   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
234   const struct tnl_clipspace_attr *a = vtx->attr;
235   const GLuint attr_count = vtx->attr_count;
236   GLuint j;
237
238   for (j = 0; j < attr_count; j++) {
239      if (a[j].attrib == attr) {
240	 a[j].extract( &a[j], dest, (GLubyte *)vin + a[j].vertoffset );
241	 return;
242      }
243   }
244
245   /* Else return the value from ctx->Current.
246    */
247   if (attr == _TNL_ATTRIB_POINTSIZE) {
248      /* If the hardware vertex doesn't have point size then use size from
249       * struct gl_context.  XXX this will be wrong if drawing attenuated points!
250       */
251      dest[0] = ctx->Point.Size;
252   }
253   else {
254      memcpy( dest, ctx->Current.Attrib[attr], 4*sizeof(GLfloat));
255   }
256}
257
258
259/* Complementary operation to the above.
260 */
261void _tnl_set_attr( struct gl_context *ctx, void *vout,
262		    GLenum attr, const GLfloat *src )
263{
264   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
265   const struct tnl_clipspace_attr *a = vtx->attr;
266   const GLuint attr_count = vtx->attr_count;
267   GLuint j;
268
269   for (j = 0; j < attr_count; j++) {
270      if (a[j].attrib == attr) {
271	 a[j].insert[4-1]( &a[j], (GLubyte *)vout + a[j].vertoffset, src );
272	 return;
273      }
274   }
275}
276
277
278void *_tnl_get_vertex( struct gl_context *ctx, GLuint nr )
279{
280   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
281
282   return vtx->vertex_buf + nr * vtx->vertex_size;
283}
284
285void _tnl_invalidate_vertex_state( struct gl_context *ctx, GLuint new_state )
286{
287   /* if two-sided lighting changes or filled/unfilled polygon state changes */
288   if (new_state & (_NEW_LIGHT | _NEW_POLYGON) ) {
289      struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
290      vtx->new_inputs = ~0;
291      vtx->interp = choose_interp_func;
292      vtx->copy_pv = choose_copy_pv_func;
293   }
294}
295
296static void invalidate_funcs( struct tnl_clipspace *vtx )
297{
298   vtx->emit = choose_emit_func;
299   vtx->interp = choose_interp_func;
300   vtx->copy_pv = choose_copy_pv_func;
301   vtx->new_inputs = ~0;
302}
303
304GLuint _tnl_install_attrs( struct gl_context *ctx, const struct tnl_attr_map *map,
305			   GLuint nr, const GLfloat *vp,
306			   GLuint unpacked_size )
307{
308   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
309   GLuint offset = 0;
310   GLuint i, j;
311
312   assert(nr < _TNL_ATTRIB_MAX);
313   assert(nr == 0 || map[0].attrib == VERT_ATTRIB_POS);
314
315   vtx->new_inputs = ~0;
316   vtx->need_viewport = GL_FALSE;
317
318   if (vp) {
319      vtx->need_viewport = GL_TRUE;
320   }
321
322   for (j = 0, i = 0; i < nr; i++) {
323      const GLuint format = map[i].format;
324      if (format == EMIT_PAD) {
325	 if (DBG)
326	    printf("%d: pad %d, offset %d\n", i,
327		   map[i].offset, offset);
328
329	 offset += map[i].offset;
330
331      }
332      else {
333	 GLuint tmpoffset;
334
335	 if (unpacked_size)
336	    tmpoffset = map[i].offset;
337	 else
338	    tmpoffset = offset;
339
340	 if (vtx->attr_count != j ||
341	     vtx->attr[j].attrib != map[i].attrib ||
342	     vtx->attr[j].format != format ||
343	     vtx->attr[j].vertoffset != tmpoffset) {
344	    invalidate_funcs(vtx);
345
346	    vtx->attr[j].attrib = map[i].attrib;
347	    vtx->attr[j].format = format;
348	    vtx->attr[j].vp = vp;
349	    vtx->attr[j].insert = _tnl_format_info[format].insert;
350	    vtx->attr[j].extract = _tnl_format_info[format].extract;
351	    vtx->attr[j].vertattrsize = _tnl_format_info[format].attrsize;
352	    vtx->attr[j].vertoffset = tmpoffset;
353	 }
354
355
356	 if (DBG)
357	    printf("%d: %s, vp %p, offset %d\n", i,
358		   _tnl_format_info[format].name, (void *)vp,
359		   vtx->attr[j].vertoffset);
360
361	 offset += _tnl_format_info[format].attrsize;
362	 j++;
363      }
364   }
365
366   vtx->attr_count = j;
367
368   if (unpacked_size)
369      vtx->vertex_size = unpacked_size;
370   else
371      vtx->vertex_size = offset;
372
373   assert(vtx->vertex_size <= vtx->max_vertex_size);
374   return vtx->vertex_size;
375}
376
377
378
379void _tnl_invalidate_vertices( struct gl_context *ctx, GLuint newinputs )
380{
381   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
382   vtx->new_inputs |= newinputs;
383}
384
385
386/* This event has broader use beyond this file - will move elsewhere
387 * and probably invoke a driver callback.
388 */
389void _tnl_notify_pipeline_output_change( struct gl_context *ctx )
390{
391   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
392   invalidate_funcs(vtx);
393}
394
395
396static void adjust_input_ptrs( struct gl_context *ctx, GLint diff)
397{
398   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
399   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
400   struct tnl_clipspace_attr *a = vtx->attr;
401   const GLuint count = vtx->attr_count;
402   GLuint j;
403
404   diff -= 1;
405   for (j=0; j<count; ++j) {
406           register GLvector4f *vptr = VB->AttribPtr[a->attrib];
407	   (a++)->inputptr += diff*vptr->stride;
408   }
409}
410
411static void update_input_ptrs( struct gl_context *ctx, GLuint start )
412{
413   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
414   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
415   struct tnl_clipspace_attr *a = vtx->attr;
416   const GLuint count = vtx->attr_count;
417   GLuint j;
418
419   for (j = 0; j < count; j++) {
420      GLvector4f *vptr = VB->AttribPtr[a[j].attrib];
421
422      if (vtx->emit != choose_emit_func) {
423	 assert(a[j].inputstride == vptr->stride);
424	 assert(a[j].inputsize == vptr->size);
425      }
426
427      a[j].inputptr = ((GLubyte *)vptr->data) + start * vptr->stride;
428   }
429
430   if (a->vp) {
431      vtx->vp_scale[0] = a->vp[MAT_SX];
432      vtx->vp_scale[1] = a->vp[MAT_SY];
433      vtx->vp_scale[2] = a->vp[MAT_SZ];
434      vtx->vp_scale[3] = 1.0;
435      vtx->vp_xlate[0] = a->vp[MAT_TX];
436      vtx->vp_xlate[1] = a->vp[MAT_TY];
437      vtx->vp_xlate[2] = a->vp[MAT_TZ];
438      vtx->vp_xlate[3] = 0.0;
439   }
440}
441
442
443void _tnl_build_vertices( struct gl_context *ctx,
444			  GLuint start,
445			  GLuint end,
446			  GLuint newinputs )
447{
448   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
449   update_input_ptrs( ctx, start );
450   vtx->emit( ctx, end - start,
451	      (GLubyte *)(vtx->vertex_buf +
452			  start * vtx->vertex_size));
453}
454
455/* Emit VB vertices start..end to dest.  Note that VB vertex at
456 * postion start will be emitted to dest at position zero.
457 */
458void *_tnl_emit_vertices_to_buffer( struct gl_context *ctx,
459				    GLuint start,
460				    GLuint end,
461				    void *dest )
462{
463   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
464
465   update_input_ptrs(ctx, start);
466   /* Note: dest should not be adjusted for non-zero 'start' values:
467    */
468   vtx->emit( ctx, end - start, (GLubyte*) dest );
469   return (void *)((GLubyte *)dest + vtx->vertex_size * (end - start));
470}
471
472/* Emit indexed VB vertices start..end to dest.  Note that VB vertex at
473 * postion start will be emitted to dest at position zero.
474 */
475
476void *_tnl_emit_indexed_vertices_to_buffer( struct gl_context *ctx,
477					    const GLuint *elts,
478					    GLuint start,
479					    GLuint end,
480					    void *dest )
481{
482   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
483   GLuint oldIndex;
484   GLubyte *cdest = dest;
485
486   update_input_ptrs(ctx, oldIndex = elts[start++]);
487   vtx->emit( ctx, 1, cdest );
488   cdest += vtx->vertex_size;
489
490   for (; start < end; ++start) {
491      adjust_input_ptrs(ctx, elts[start] - oldIndex);
492      oldIndex = elts[start];
493      vtx->emit( ctx, 1, cdest);
494      cdest += vtx->vertex_size;
495   }
496
497   return (void *) cdest;
498}
499
500
501void _tnl_init_vertices( struct gl_context *ctx,
502			GLuint vb_size,
503			GLuint max_vertex_size )
504{
505   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
506
507   _tnl_install_attrs( ctx, NULL, 0, NULL, 0 );
508
509   vtx->need_extras = GL_TRUE;
510   if (max_vertex_size > vtx->max_vertex_size) {
511      _tnl_free_vertices( ctx );
512      vtx->max_vertex_size = max_vertex_size;
513      vtx->vertex_buf = _mesa_align_calloc(vb_size * max_vertex_size, 32 );
514      invalidate_funcs(vtx);
515   }
516
517   switch(CHAN_TYPE) {
518   case GL_UNSIGNED_BYTE:
519      vtx->chan_scale[0] = 255.0;
520      vtx->chan_scale[1] = 255.0;
521      vtx->chan_scale[2] = 255.0;
522      vtx->chan_scale[3] = 255.0;
523      break;
524   case GL_UNSIGNED_SHORT:
525      vtx->chan_scale[0] = 65535.0;
526      vtx->chan_scale[1] = 65535.0;
527      vtx->chan_scale[2] = 65535.0;
528      vtx->chan_scale[3] = 65535.0;
529      break;
530   default:
531      vtx->chan_scale[0] = 1.0;
532      vtx->chan_scale[1] = 1.0;
533      vtx->chan_scale[2] = 1.0;
534      vtx->chan_scale[3] = 1.0;
535      break;
536   }
537
538   vtx->identity[0] = 0.0;
539   vtx->identity[1] = 0.0;
540   vtx->identity[2] = 0.0;
541   vtx->identity[3] = 1.0;
542
543   vtx->codegen_emit = NULL;
544
545#ifdef USE_SSE_ASM
546   if (!getenv("MESA_NO_CODEGEN"))
547      vtx->codegen_emit = _tnl_generate_sse_emit;
548#endif
549}
550
551
552void _tnl_free_vertices( struct gl_context *ctx )
553{
554   TNLcontext *tnl = TNL_CONTEXT(ctx);
555   if (tnl) {
556      struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
557      struct tnl_clipspace_fastpath *fp, *tmp;
558
559      _mesa_align_free(vtx->vertex_buf);
560      vtx->vertex_buf = NULL;
561
562      for (fp = vtx->fastpath ; fp ; fp = tmp) {
563         tmp = fp->next;
564         free(fp->attr);
565
566         /* KW: At the moment, fp->func is constrained to be allocated by
567          * _mesa_exec_alloc(), as the hardwired fastpaths in
568          * t_vertex_generic.c are handled specially.  It would be nice
569          * to unify them, but this probably won't change until this
570          * module gets another overhaul.
571          */
572         _mesa_exec_free((void *) fp->func);
573         free(fp);
574      }
575
576      vtx->fastpath = NULL;
577   }
578}
579