1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 *    Keith Whitwell <keithw@vmware.com>
27 */
28
29/* Split indexed primitives with per-vertex copying.
30 */
31
32#include <stdio.h>
33
34#include "main/glheader.h"
35#include "main/bufferobj.h"
36#include "main/imports.h"
37#include "main/glformats.h"
38#include "main/macros.h"
39#include "main/mtypes.h"
40
41#include "vbo_split.h"
42#include "vbo.h"
43
44
45#define ELT_TABLE_SIZE 16
46
47/**
48 * Used for vertex-level splitting of indexed buffers.  Note that
49 * non-indexed primitives may be converted to indexed in some cases
50 * (eg loops, fans) in order to use this splitting path.
51 */
52struct copy_context {
53
54   struct gl_context *ctx;
55   const struct gl_vertex_array **array;
56   const struct _mesa_prim *prim;
57   GLuint nr_prims;
58   const struct _mesa_index_buffer *ib;
59   vbo_draw_func draw;
60
61   const struct split_limits *limits;
62
63   struct {
64      GLuint attr;
65      GLuint size;
66      const struct gl_vertex_array *array;
67      const GLubyte *src_ptr;
68
69      struct gl_vertex_array dstarray;
70
71   } varying[VERT_ATTRIB_MAX];
72   GLuint nr_varying;
73
74   const struct gl_vertex_array *dstarray_ptr[VERT_ATTRIB_MAX];
75   struct _mesa_index_buffer dstib;
76
77   GLuint *translated_elt_buf;
78   const GLuint *srcelt;
79
80   /** A baby hash table to avoid re-emitting (some) duplicate
81    * vertices when splitting indexed primitives.
82    */
83   struct {
84      GLuint in;
85      GLuint out;
86   } vert_cache[ELT_TABLE_SIZE];
87
88   GLuint vertex_size;
89   GLubyte *dstbuf;
90   GLubyte *dstptr;     /**< dstptr == dstbuf + dstelt_max * vertsize */
91   GLuint dstbuf_size;  /**< in vertices */
92   GLuint dstbuf_nr;    /**< count of emitted vertices, also the largest value
93                         * in dstelt.  Our MaxIndex.
94                         */
95
96   GLuint *dstelt;
97   GLuint dstelt_nr;
98   GLuint dstelt_size;
99
100#define MAX_PRIM 32
101   struct _mesa_prim dstprim[MAX_PRIM];
102   GLuint dstprim_nr;
103
104};
105
106
107static GLuint attr_size( const struct gl_vertex_array *array )
108{
109   return array->Size * _mesa_sizeof_type(array->Type);
110}
111
112
113/**
114 * Starts returning true slightly before the buffer fills, to ensure
115 * that there is sufficient room for any remaining vertices to finish
116 * off the prim:
117 */
118static GLboolean
119check_flush( struct copy_context *copy )
120{
121   GLenum mode = copy->dstprim[copy->dstprim_nr].mode;
122
123   if (GL_TRIANGLE_STRIP == mode &&
124       copy->dstelt_nr & 1) { /* see bug9962 */
125       return GL_FALSE;
126   }
127
128   if (copy->dstbuf_nr + 4 > copy->dstbuf_size)
129      return GL_TRUE;
130
131   if (copy->dstelt_nr + 4 > copy->dstelt_size)
132      return GL_TRUE;
133
134   return GL_FALSE;
135}
136
137
138/**
139 * Dump the parameters/info for a vbo->draw() call.
140 */
141static void
142dump_draw_info(struct gl_context *ctx,
143               const struct gl_vertex_array **arrays,
144               const struct _mesa_prim *prims,
145               GLuint nr_prims,
146               const struct _mesa_index_buffer *ib,
147               GLuint min_index,
148               GLuint max_index)
149{
150   GLuint i, j;
151
152   printf("VBO Draw:\n");
153   for (i = 0; i < nr_prims; i++) {
154      printf("Prim %u of %u\n", i, nr_prims);
155      printf("  Prim mode 0x%x\n", prims[i].mode);
156      printf("  IB: %p\n", (void*) ib);
157      for (j = 0; j < VERT_ATTRIB_MAX; j++) {
158         printf("    array %d at %p:\n", j, (void*) arrays[j]);
159         printf("      ptr %p, size %d, type 0x%x, stride %d\n",
160		arrays[j]->Ptr,
161		arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB);
162         if (0) {
163            GLint k = prims[i].start + prims[i].count - 1;
164            GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->StrideB * k);
165            printf("        last: %f %f %f\n",
166		   last[0], last[1], last[2]);
167         }
168      }
169   }
170}
171
172
173static void
174flush( struct copy_context *copy )
175{
176   struct gl_context *ctx = copy->ctx;
177   const struct gl_vertex_array **saved_arrays = ctx->Array._DrawArrays;
178   GLuint i;
179
180   /* Set some counters:
181    */
182   copy->dstib.count = copy->dstelt_nr;
183
184#if 0
185   dump_draw_info(copy->ctx,
186                  copy->dstarray_ptr,
187                  copy->dstprim,
188                  copy->dstprim_nr,
189                  &copy->dstib,
190                  0,
191                  copy->dstbuf_nr);
192#else
193   (void) dump_draw_info;
194#endif
195
196   ctx->Array._DrawArrays = copy->dstarray_ptr;
197   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
198
199   copy->draw( ctx,
200	       copy->dstprim,
201	       copy->dstprim_nr,
202	       &copy->dstib,
203	       GL_TRUE,
204	       0,
205	       copy->dstbuf_nr - 1,
206	       NULL, 0, NULL );
207
208   ctx->Array._DrawArrays = saved_arrays;
209   ctx->NewDriverState |= ctx->DriverFlags.NewArray;
210
211   /* Reset all pointers:
212    */
213   copy->dstprim_nr = 0;
214   copy->dstelt_nr = 0;
215   copy->dstbuf_nr = 0;
216   copy->dstptr = copy->dstbuf;
217
218   /* Clear the vertex cache:
219    */
220   for (i = 0; i < ELT_TABLE_SIZE; i++)
221      copy->vert_cache[i].in = ~0;
222}
223
224
225/**
226 * Called at begin of each primitive during replay.
227 */
228static void
229begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag )
230{
231   struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
232
233   prim->mode = mode;
234   prim->begin = begin_flag;
235   prim->num_instances = 1;
236}
237
238
239/**
240 * Use a hashtable to attempt to identify recently-emitted vertices
241 * and avoid re-emitting them.
242 */
243static GLuint
244elt(struct copy_context *copy, GLuint elt_idx)
245{
246   GLuint elt = copy->srcelt[elt_idx] + copy->prim->basevertex;
247   GLuint slot = elt & (ELT_TABLE_SIZE-1);
248
249/*    printf("elt %d\n", elt); */
250
251   /* Look up the incoming element in the vertex cache.  Re-emit if
252    * necessary.
253    */
254   if (copy->vert_cache[slot].in != elt) {
255      GLubyte *csr = copy->dstptr;
256      GLuint i;
257
258/*       printf("  --> emit to dstelt %d\n", copy->dstbuf_nr); */
259
260      for (i = 0; i < copy->nr_varying; i++) {
261	 const struct gl_vertex_array *srcarray = copy->varying[i].array;
262	 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB;
263
264	 memcpy(csr, srcptr, copy->varying[i].size);
265	 csr += copy->varying[i].size;
266
267#ifdef NAN_CHECK
268         if (srcarray->Type == GL_FLOAT) {
269            GLuint k;
270            GLfloat *f = (GLfloat *) srcptr;
271            for (k = 0; k < srcarray->Size; k++) {
272               assert(!IS_INF_OR_NAN(f[k]));
273               assert(f[k] <= 1.0e20 && f[k] >= -1.0e20);
274            }
275         }
276#endif
277
278	 if (0)
279	 {
280	    const GLuint *f = (const GLuint *)srcptr;
281	    GLuint j;
282	    printf("  varying %d: ", i);
283	    for(j = 0; j < copy->varying[i].size / 4; j++)
284	       printf("%x ", f[j]);
285	    printf("\n");
286	 }
287      }
288
289      copy->vert_cache[slot].in = elt;
290      copy->vert_cache[slot].out = copy->dstbuf_nr++;
291      copy->dstptr += copy->vertex_size;
292
293      assert(csr == copy->dstptr);
294      assert(copy->dstptr == (copy->dstbuf +
295                              copy->dstbuf_nr * copy->vertex_size));
296   }
297/*    else */
298/*       printf("  --> reuse vertex\n"); */
299
300/*    printf("  --> emit %d\n", copy->vert_cache[slot].out); */
301   copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out;
302   return check_flush(copy);
303}
304
305
306/**
307 * Called at end of each primitive during replay.
308 */
309static void
310end( struct copy_context *copy, GLboolean end_flag )
311{
312   struct _mesa_prim *prim = &copy->dstprim[copy->dstprim_nr];
313
314/*    printf("end (%d)\n", end_flag); */
315
316   prim->end = end_flag;
317   prim->count = copy->dstelt_nr - prim->start;
318
319   if (++copy->dstprim_nr == MAX_PRIM ||
320       check_flush(copy))
321      flush(copy);
322}
323
324
325static void
326replay_elts( struct copy_context *copy )
327{
328   GLuint i, j, k;
329   GLboolean split;
330
331   for (i = 0; i < copy->nr_prims; i++) {
332      const struct _mesa_prim *prim = &copy->prim[i];
333      const GLuint start = prim->start;
334      GLuint first, incr;
335
336      switch (prim->mode) {
337
338      case GL_LINE_LOOP:
339	 /* Convert to linestrip and emit the final vertex explicitly,
340	  * but only in the resultant strip that requires it.
341	  */
342	 j = 0;
343	 while (j != prim->count) {
344	    begin(copy, GL_LINE_STRIP, prim->begin && j == 0);
345
346	    for (split = GL_FALSE; j != prim->count && !split; j++)
347	       split = elt(copy, start + j);
348
349	    if (j == prim->count) {
350	       /* Done, emit final line.  Split doesn't matter as
351		* it is always raised a bit early so we can emit
352		* the last verts if necessary!
353		*/
354	       if (prim->end)
355		  (void)elt(copy, start + 0);
356
357	       end(copy, prim->end);
358	    }
359	    else {
360	       /* Wrap
361		*/
362	       assert(split);
363	       end(copy, 0);
364	       j--;
365	    }
366	 }
367	 break;
368
369      case GL_TRIANGLE_FAN:
370      case GL_POLYGON:
371	 j = 2;
372	 while (j != prim->count) {
373	    begin(copy, prim->mode, prim->begin && j == 0);
374
375	    split = elt(copy, start+0);
376	    assert(!split);
377
378	    split = elt(copy, start+j-1);
379	    assert(!split);
380
381	    for (; j != prim->count && !split; j++)
382	       split = elt(copy, start+j);
383
384	    end(copy, prim->end && j == prim->count);
385
386	    if (j != prim->count) {
387	       /* Wrapped the primitive, need to repeat some vertices:
388		*/
389	       j -= 1;
390	    }
391	 }
392	 break;
393
394      default:
395	 (void)split_prim_inplace(prim->mode, &first, &incr);
396
397	 j = 0;
398	 while (j != prim->count) {
399
400	    begin(copy, prim->mode, prim->begin && j == 0);
401
402	    split = 0;
403	    for (k = 0; k < first; k++, j++)
404	       split |= elt(copy, start+j);
405
406	    assert(!split);
407
408	    for (; j != prim->count && !split; )
409	       for (k = 0; k < incr; k++, j++)
410		  split |= elt(copy, start+j);
411
412	    end(copy, prim->end && j == prim->count);
413
414	    if (j != prim->count) {
415	       /* Wrapped the primitive, need to repeat some vertices:
416		*/
417	       assert(j > first - incr);
418	       j -= (first - incr);
419	    }
420	 }
421	 break;
422      }
423   }
424
425   if (copy->dstprim_nr)
426      flush(copy);
427}
428
429
430static void
431replay_init( struct copy_context *copy )
432{
433   struct gl_context *ctx = copy->ctx;
434   GLuint i;
435   GLuint offset;
436   const GLvoid *srcptr;
437
438   /* Make a list of varying attributes and their vbo's.  Also
439    * calculate vertex size.
440    */
441   copy->vertex_size = 0;
442   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
443      struct gl_buffer_object *vbo = copy->array[i]->BufferObj;
444
445      if (copy->array[i]->StrideB == 0) {
446	 copy->dstarray_ptr[i] = copy->array[i];
447      }
448      else {
449	 GLuint j = copy->nr_varying++;
450
451	 copy->varying[j].attr = i;
452	 copy->varying[j].array = copy->array[i];
453	 copy->varying[j].size = attr_size(copy->array[i]);
454	 copy->vertex_size += attr_size(copy->array[i]);
455
456	 if (_mesa_is_bufferobj(vbo) &&
457             !_mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
458	    ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo,
459                                       MAP_INTERNAL);
460
461	 copy->varying[j].src_ptr =
462               ADD_POINTERS(vbo->Mappings[MAP_INTERNAL].Pointer,
463                            copy->array[i]->Ptr);
464
465	 copy->dstarray_ptr[i] = &copy->varying[j].dstarray;
466      }
467   }
468
469   /* There must always be an index buffer.  Currently require the
470    * caller convert non-indexed prims to indexed.  Could alternately
471    * do it internally.
472    */
473   if (_mesa_is_bufferobj(copy->ib->obj) &&
474       !_mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL))
475      ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT,
476				 copy->ib->obj, MAP_INTERNAL);
477
478   srcptr = (const GLubyte *)
479            ADD_POINTERS(copy->ib->obj->Mappings[MAP_INTERNAL].Pointer,
480                         copy->ib->ptr);
481
482   switch (copy->ib->type) {
483   case GL_UNSIGNED_BYTE:
484      copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
485      copy->srcelt = copy->translated_elt_buf;
486
487      for (i = 0; i < copy->ib->count; i++)
488	 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i];
489      break;
490
491   case GL_UNSIGNED_SHORT:
492      copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count);
493      copy->srcelt = copy->translated_elt_buf;
494
495      for (i = 0; i < copy->ib->count; i++)
496	 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i];
497      break;
498
499   case GL_UNSIGNED_INT:
500      copy->translated_elt_buf = NULL;
501      copy->srcelt = (const GLuint *)srcptr;
502      break;
503   }
504
505   /* Figure out the maximum allowed vertex buffer size:
506    */
507   if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) {
508      copy->dstbuf_size = copy->limits->max_verts;
509   }
510   else {
511      copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size;
512   }
513
514   /* Allocate an output vertex buffer:
515    *
516    * XXX:  This should be a VBO!
517    */
518   copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size);
519   copy->dstptr = copy->dstbuf;
520
521   /* Setup new vertex arrays to point into the output buffer:
522    */
523   for (offset = 0, i = 0; i < copy->nr_varying; i++) {
524      const struct gl_vertex_array *src = copy->varying[i].array;
525      struct gl_vertex_array *dst = &copy->varying[i].dstarray;
526
527      dst->Size = src->Size;
528      dst->Type = src->Type;
529      dst->Format = GL_RGBA;
530      dst->StrideB = copy->vertex_size;
531      dst->Ptr = copy->dstbuf + offset;
532      dst->Normalized = src->Normalized;
533      dst->Integer = src->Integer;
534      dst->Doubles = src->Doubles;
535      dst->BufferObj = ctx->Shared->NullBufferObj;
536      dst->_ElementSize = src->_ElementSize;
537
538      offset += copy->varying[i].size;
539   }
540
541   /* Allocate an output element list:
542    */
543   copy->dstelt_size = MIN2(65536,
544			    copy->ib->count * 2 + 3);
545   copy->dstelt_size = MIN2(copy->dstelt_size,
546			    copy->limits->max_indices);
547   copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size);
548   copy->dstelt_nr = 0;
549
550   /* Setup the new index buffer to point to the allocated element
551    * list:
552    */
553   copy->dstib.count = 0;	/* duplicates dstelt_nr */
554   copy->dstib.type = GL_UNSIGNED_INT;
555   copy->dstib.obj = ctx->Shared->NullBufferObj;
556   copy->dstib.ptr = copy->dstelt;
557}
558
559
560/**
561 * Free up everything allocated during split/replay.
562 */
563static void
564replay_finish( struct copy_context *copy )
565{
566   struct gl_context *ctx = copy->ctx;
567   GLuint i;
568
569   /* Free our vertex and index buffers:
570    */
571   free(copy->translated_elt_buf);
572   free(copy->dstbuf);
573   free(copy->dstelt);
574
575   /* Unmap VBO's
576    */
577   for (i = 0; i < copy->nr_varying; i++) {
578      struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj;
579      if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo, MAP_INTERNAL))
580	 ctx->Driver.UnmapBuffer(ctx, vbo, MAP_INTERNAL);
581   }
582
583   /* Unmap index buffer:
584    */
585   if (_mesa_is_bufferobj(copy->ib->obj) &&
586       _mesa_bufferobj_mapped(copy->ib->obj, MAP_INTERNAL)) {
587      ctx->Driver.UnmapBuffer(ctx, copy->ib->obj, MAP_INTERNAL);
588   }
589}
590
591
592/**
593 * Split VBO into smaller pieces, draw the pieces.
594 */
595void vbo_split_copy( struct gl_context *ctx,
596		     const struct gl_vertex_array *arrays[],
597		     const struct _mesa_prim *prim,
598		     GLuint nr_prims,
599		     const struct _mesa_index_buffer *ib,
600		     vbo_draw_func draw,
601		     const struct split_limits *limits )
602{
603   struct copy_context copy;
604   GLuint i, this_nr_prims;
605
606   for (i = 0; i < nr_prims;) {
607      /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
608       * will rebase the elements to the basevertex, and we'll only
609       * emit strings of prims with the same basevertex in one draw call.
610       */
611      for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
612	   this_nr_prims++) {
613	 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
614	    break;
615      }
616
617      memset(&copy, 0, sizeof(copy));
618
619      /* Require indexed primitives:
620       */
621      assert(ib);
622
623      copy.ctx = ctx;
624      copy.array = arrays;
625      copy.prim = &prim[i];
626      copy.nr_prims = this_nr_prims;
627      copy.ib = ib;
628      copy.draw = draw;
629      copy.limits = limits;
630
631      /* Clear the vertex cache:
632       */
633      for (i = 0; i < ELT_TABLE_SIZE; i++)
634	 copy.vert_cache[i].in = ~0;
635
636      replay_init(&copy);
637      replay_elts(&copy);
638      replay_finish(&copy);
639   }
640}
641