vbo_split_copy.c revision c5e473fbe25b20cb27aac44ff6e269701abd33a8
1 2/* 3 * Mesa 3-D graphics library 4 * Version: 6.5 5 * 6 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included 16 * in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 * Authors: 26 * Keith Whitwell <keith@tungstengraphics.com> 27 */ 28 29/* Split indexed primitives with per-vertex copying. 30 */ 31 32#include "main/glheader.h" 33#include "main/bufferobj.h" 34#include "main/imports.h" 35#include "main/image.h" 36#include "main/macros.h" 37#include "main/mtypes.h" 38 39#include "vbo_split.h" 40#include "vbo.h" 41 42 43#define ELT_TABLE_SIZE 16 44 45/** 46 * Used for vertex-level splitting of indexed buffers. Note that 47 * non-indexed primitives may be converted to indexed in some cases 48 * (eg loops, fans) in order to use this splitting path. 49 */ 50struct copy_context { 51 52 struct gl_context *ctx; 53 const struct gl_client_array **array; 54 const struct _mesa_prim *prim; 55 GLuint nr_prims; 56 const struct _mesa_index_buffer *ib; 57 vbo_draw_func draw; 58 59 const struct split_limits *limits; 60 61 struct { 62 GLuint attr; 63 GLuint size; 64 const struct gl_client_array *array; 65 const GLubyte *src_ptr; 66 67 struct gl_client_array dstarray; 68 69 } varying[VERT_ATTRIB_MAX]; 70 GLuint nr_varying; 71 72 const struct gl_client_array *dstarray_ptr[VERT_ATTRIB_MAX]; 73 struct _mesa_index_buffer dstib; 74 75 GLuint *translated_elt_buf; 76 const GLuint *srcelt; 77 78 /** A baby hash table to avoid re-emitting (some) duplicate 79 * vertices when splitting indexed primitives. 80 */ 81 struct { 82 GLuint in; 83 GLuint out; 84 } vert_cache[ELT_TABLE_SIZE]; 85 86 GLuint vertex_size; 87 GLubyte *dstbuf; 88 GLubyte *dstptr; /**< dstptr == dstbuf + dstelt_max * vertsize */ 89 GLuint dstbuf_size; /**< in vertices */ 90 GLuint dstbuf_nr; /**< count of emitted vertices, also the largest value 91 * in dstelt. Our MaxIndex. 92 */ 93 94 GLuint *dstelt; 95 GLuint dstelt_nr; 96 GLuint dstelt_size; 97 98#define MAX_PRIM 32 99 struct _mesa_prim dstprim[MAX_PRIM]; 100 GLuint dstprim_nr; 101 102}; 103 104 105static GLuint attr_size( const struct gl_client_array *array ) 106{ 107 return array->Size * _mesa_sizeof_type(array->Type); 108} 109 110 111/** 112 * Starts returning true slightly before the buffer fills, to ensure 113 * that there is sufficient room for any remaining vertices to finish 114 * off the prim: 115 */ 116static GLboolean 117check_flush( struct copy_context *copy ) 118{ 119 GLenum mode = copy->dstprim[copy->dstprim_nr].mode; 120 121 if (GL_TRIANGLE_STRIP == mode && 122 copy->dstelt_nr & 1) { /* see bug9962 */ 123 return GL_FALSE; 124 } 125 126 if (copy->dstbuf_nr + 4 > copy->dstbuf_size) 127 return GL_TRUE; 128 129 if (copy->dstelt_nr + 4 > copy->dstelt_size) 130 return GL_TRUE; 131 132 return GL_FALSE; 133} 134 135 136/** 137 * Dump the parameters/info for a vbo->draw() call. 138 */ 139static void 140dump_draw_info(struct gl_context *ctx, 141 const struct gl_client_array **arrays, 142 const struct _mesa_prim *prims, 143 GLuint nr_prims, 144 const struct _mesa_index_buffer *ib, 145 GLuint min_index, 146 GLuint max_index) 147{ 148 GLuint i, j; 149 150 printf("VBO Draw:\n"); 151 for (i = 0; i < nr_prims; i++) { 152 printf("Prim %u of %u\n", i, nr_prims); 153 printf(" Prim mode 0x%x\n", prims[i].mode); 154 printf(" IB: %p\n", (void*) ib); 155 for (j = 0; j < VERT_ATTRIB_MAX; j++) { 156 printf(" array %d at %p:\n", j, (void*) arrays[j]); 157 printf(" enabled %d, ptr %p, size %d, type 0x%x, stride %d\n", 158 arrays[j]->Enabled, arrays[j]->Ptr, 159 arrays[j]->Size, arrays[j]->Type, arrays[j]->StrideB); 160 if (0) { 161 GLint k = prims[i].start + prims[i].count - 1; 162 GLfloat *last = (GLfloat *) (arrays[j]->Ptr + arrays[j]->Stride * k); 163 printf(" last: %f %f %f\n", 164 last[0], last[1], last[2]); 165 } 166 } 167 } 168} 169 170 171static void 172flush( struct copy_context *copy ) 173{ 174 struct gl_context *ctx = copy->ctx; 175 const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays; 176 GLuint i; 177 178 /* Set some counters: 179 */ 180 copy->dstib.count = copy->dstelt_nr; 181 182#if 0 183 dump_draw_info(copy->ctx, 184 copy->dstarray_ptr, 185 copy->dstprim, 186 copy->dstprim_nr, 187 ©->dstib, 188 0, 189 copy->dstbuf_nr); 190#else 191 (void) dump_draw_info; 192#endif 193 194 ctx->Array._DrawArrays = copy->dstarray_ptr; 195 ctx->NewDriverState |= ctx->DriverFlags.NewArray; 196 197 copy->draw( ctx, 198 copy->dstprim, 199 copy->dstprim_nr, 200 ©->dstib, 201 GL_TRUE, 202 0, 203 copy->dstbuf_nr - 1, 204 NULL ); 205 206 ctx->Array._DrawArrays = saved_arrays; 207 ctx->NewDriverState |= ctx->DriverFlags.NewArray; 208 209 /* Reset all pointers: 210 */ 211 copy->dstprim_nr = 0; 212 copy->dstelt_nr = 0; 213 copy->dstbuf_nr = 0; 214 copy->dstptr = copy->dstbuf; 215 216 /* Clear the vertex cache: 217 */ 218 for (i = 0; i < ELT_TABLE_SIZE; i++) 219 copy->vert_cache[i].in = ~0; 220} 221 222 223/** 224 * Called at begin of each primitive during replay. 225 */ 226static void 227begin( struct copy_context *copy, GLenum mode, GLboolean begin_flag ) 228{ 229 struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr]; 230 231 prim->mode = mode; 232 prim->begin = begin_flag; 233 prim->num_instances = 1; 234} 235 236 237/** 238 * Use a hashtable to attempt to identify recently-emitted vertices 239 * and avoid re-emitting them. 240 */ 241static GLuint 242elt(struct copy_context *copy, GLuint elt_idx) 243{ 244 GLuint elt = copy->srcelt[elt_idx]; 245 GLuint slot = elt & (ELT_TABLE_SIZE-1); 246 247/* printf("elt %d\n", elt); */ 248 249 /* Look up the incoming element in the vertex cache. Re-emit if 250 * necessary. 251 */ 252 if (copy->vert_cache[slot].in != elt) { 253 GLubyte *csr = copy->dstptr; 254 GLuint i; 255 256/* printf(" --> emit to dstelt %d\n", copy->dstbuf_nr); */ 257 258 for (i = 0; i < copy->nr_varying; i++) { 259 const struct gl_client_array *srcarray = copy->varying[i].array; 260 const GLubyte *srcptr = copy->varying[i].src_ptr + elt * srcarray->StrideB; 261 262 memcpy(csr, srcptr, copy->varying[i].size); 263 csr += copy->varying[i].size; 264 265#ifdef NAN_CHECK 266 if (srcarray->Type == GL_FLOAT) { 267 GLuint k; 268 GLfloat *f = (GLfloat *) srcptr; 269 for (k = 0; k < srcarray->Size; k++) { 270 assert(!IS_INF_OR_NAN(f[k])); 271 assert(f[k] <= 1.0e20 && f[k] >= -1.0e20); 272 } 273 } 274#endif 275 276 if (0) 277 { 278 const GLuint *f = (const GLuint *)srcptr; 279 GLuint j; 280 printf(" varying %d: ", i); 281 for(j = 0; j < copy->varying[i].size / 4; j++) 282 printf("%x ", f[j]); 283 printf("\n"); 284 } 285 } 286 287 copy->vert_cache[slot].in = elt; 288 copy->vert_cache[slot].out = copy->dstbuf_nr++; 289 copy->dstptr += copy->vertex_size; 290 291 assert(csr == copy->dstptr); 292 assert(copy->dstptr == (copy->dstbuf + 293 copy->dstbuf_nr * copy->vertex_size)); 294 } 295/* else */ 296/* printf(" --> reuse vertex\n"); */ 297 298/* printf(" --> emit %d\n", copy->vert_cache[slot].out); */ 299 copy->dstelt[copy->dstelt_nr++] = copy->vert_cache[slot].out; 300 return check_flush(copy); 301} 302 303 304/** 305 * Called at end of each primitive during replay. 306 */ 307static void 308end( struct copy_context *copy, GLboolean end_flag ) 309{ 310 struct _mesa_prim *prim = ©->dstprim[copy->dstprim_nr]; 311 312/* printf("end (%d)\n", end_flag); */ 313 314 prim->end = end_flag; 315 prim->count = copy->dstelt_nr - prim->start; 316 317 if (++copy->dstprim_nr == MAX_PRIM || 318 check_flush(copy)) 319 flush(copy); 320} 321 322 323static void 324replay_elts( struct copy_context *copy ) 325{ 326 GLuint i, j, k; 327 GLboolean split; 328 329 for (i = 0; i < copy->nr_prims; i++) { 330 const struct _mesa_prim *prim = ©->prim[i]; 331 const GLuint start = prim->start; 332 GLuint first, incr; 333 334 switch (prim->mode) { 335 336 case GL_LINE_LOOP: 337 /* Convert to linestrip and emit the final vertex explicitly, 338 * but only in the resultant strip that requires it. 339 */ 340 j = 0; 341 while (j != prim->count) { 342 begin(copy, GL_LINE_STRIP, prim->begin && j == 0); 343 344 for (split = GL_FALSE; j != prim->count && !split; j++) 345 split = elt(copy, start + j); 346 347 if (j == prim->count) { 348 /* Done, emit final line. Split doesn't matter as 349 * it is always raised a bit early so we can emit 350 * the last verts if necessary! 351 */ 352 if (prim->end) 353 (void)elt(copy, start + 0); 354 355 end(copy, prim->end); 356 } 357 else { 358 /* Wrap 359 */ 360 assert(split); 361 end(copy, 0); 362 j--; 363 } 364 } 365 break; 366 367 case GL_TRIANGLE_FAN: 368 case GL_POLYGON: 369 j = 2; 370 while (j != prim->count) { 371 begin(copy, prim->mode, prim->begin && j == 0); 372 373 split = elt(copy, start+0); 374 assert(!split); 375 376 split = elt(copy, start+j-1); 377 assert(!split); 378 379 for (; j != prim->count && !split; j++) 380 split = elt(copy, start+j); 381 382 end(copy, prim->end && j == prim->count); 383 384 if (j != prim->count) { 385 /* Wrapped the primitive, need to repeat some vertices: 386 */ 387 j -= 1; 388 } 389 } 390 break; 391 392 default: 393 (void)split_prim_inplace(prim->mode, &first, &incr); 394 395 j = 0; 396 while (j != prim->count) { 397 398 begin(copy, prim->mode, prim->begin && j == 0); 399 400 split = 0; 401 for (k = 0; k < first; k++, j++) 402 split |= elt(copy, start+j); 403 404 assert(!split); 405 406 for (; j != prim->count && !split; ) 407 for (k = 0; k < incr; k++, j++) 408 split |= elt(copy, start+j); 409 410 end(copy, prim->end && j == prim->count); 411 412 if (j != prim->count) { 413 /* Wrapped the primitive, need to repeat some vertices: 414 */ 415 assert(j > first - incr); 416 j -= (first - incr); 417 } 418 } 419 break; 420 } 421 } 422 423 if (copy->dstprim_nr) 424 flush(copy); 425} 426 427 428static void 429replay_init( struct copy_context *copy ) 430{ 431 struct gl_context *ctx = copy->ctx; 432 GLuint i; 433 GLuint offset; 434 const GLvoid *srcptr; 435 436 /* Make a list of varying attributes and their vbo's. Also 437 * calculate vertex size. 438 */ 439 copy->vertex_size = 0; 440 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 441 struct gl_buffer_object *vbo = copy->array[i]->BufferObj; 442 443 if (copy->array[i]->StrideB == 0) { 444 copy->dstarray_ptr[i] = copy->array[i]; 445 } 446 else { 447 GLuint j = copy->nr_varying++; 448 449 copy->varying[j].attr = i; 450 copy->varying[j].array = copy->array[i]; 451 copy->varying[j].size = attr_size(copy->array[i]); 452 copy->vertex_size += attr_size(copy->array[i]); 453 454 if (_mesa_is_bufferobj(vbo) && !_mesa_bufferobj_mapped(vbo)) 455 ctx->Driver.MapBufferRange(ctx, 0, vbo->Size, GL_MAP_READ_BIT, vbo); 456 457 copy->varying[j].src_ptr = ADD_POINTERS(vbo->Pointer, 458 copy->array[i]->Ptr); 459 460 copy->dstarray_ptr[i] = ©->varying[j].dstarray; 461 } 462 } 463 464 /* There must always be an index buffer. Currently require the 465 * caller convert non-indexed prims to indexed. Could alternately 466 * do it internally. 467 */ 468 if (_mesa_is_bufferobj(copy->ib->obj) && 469 !_mesa_bufferobj_mapped(copy->ib->obj)) 470 ctx->Driver.MapBufferRange(ctx, 0, copy->ib->obj->Size, GL_MAP_READ_BIT, 471 copy->ib->obj); 472 473 srcptr = (const GLubyte *) ADD_POINTERS(copy->ib->obj->Pointer, 474 copy->ib->ptr); 475 476 switch (copy->ib->type) { 477 case GL_UNSIGNED_BYTE: 478 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count); 479 copy->srcelt = copy->translated_elt_buf; 480 481 for (i = 0; i < copy->ib->count; i++) 482 copy->translated_elt_buf[i] = ((const GLubyte *)srcptr)[i]; 483 break; 484 485 case GL_UNSIGNED_SHORT: 486 copy->translated_elt_buf = malloc(sizeof(GLuint) * copy->ib->count); 487 copy->srcelt = copy->translated_elt_buf; 488 489 for (i = 0; i < copy->ib->count; i++) 490 copy->translated_elt_buf[i] = ((const GLushort *)srcptr)[i]; 491 break; 492 493 case GL_UNSIGNED_INT: 494 copy->translated_elt_buf = NULL; 495 copy->srcelt = (const GLuint *)srcptr; 496 break; 497 } 498 499 /* Figure out the maximum allowed vertex buffer size: 500 */ 501 if (copy->vertex_size * copy->limits->max_verts <= copy->limits->max_vb_size) { 502 copy->dstbuf_size = copy->limits->max_verts; 503 } 504 else { 505 copy->dstbuf_size = copy->limits->max_vb_size / copy->vertex_size; 506 } 507 508 /* Allocate an output vertex buffer: 509 * 510 * XXX: This should be a VBO! 511 */ 512 copy->dstbuf = malloc(copy->dstbuf_size * copy->vertex_size); 513 copy->dstptr = copy->dstbuf; 514 515 /* Setup new vertex arrays to point into the output buffer: 516 */ 517 for (offset = 0, i = 0; i < copy->nr_varying; i++) { 518 const struct gl_client_array *src = copy->varying[i].array; 519 struct gl_client_array *dst = ©->varying[i].dstarray; 520 521 dst->Size = src->Size; 522 dst->Type = src->Type; 523 dst->Format = GL_RGBA; 524 dst->Stride = copy->vertex_size; 525 dst->StrideB = copy->vertex_size; 526 dst->Ptr = copy->dstbuf + offset; 527 dst->Enabled = GL_TRUE; 528 dst->Normalized = src->Normalized; 529 dst->Integer = src->Integer; 530 dst->BufferObj = ctx->Shared->NullBufferObj; 531 dst->_ElementSize = src->_ElementSize; 532 dst->_MaxElement = copy->dstbuf_size; /* may be less! */ 533 534 offset += copy->varying[i].size; 535 } 536 537 /* Allocate an output element list: 538 */ 539 copy->dstelt_size = MIN2(65536, 540 copy->ib->count * 2 + 3); 541 copy->dstelt_size = MIN2(copy->dstelt_size, 542 copy->limits->max_indices); 543 copy->dstelt = malloc(sizeof(GLuint) * copy->dstelt_size); 544 copy->dstelt_nr = 0; 545 546 /* Setup the new index buffer to point to the allocated element 547 * list: 548 */ 549 copy->dstib.count = 0; /* duplicates dstelt_nr */ 550 copy->dstib.type = GL_UNSIGNED_INT; 551 copy->dstib.obj = ctx->Shared->NullBufferObj; 552 copy->dstib.ptr = copy->dstelt; 553} 554 555 556/** 557 * Free up everything allocated during split/replay. 558 */ 559static void 560replay_finish( struct copy_context *copy ) 561{ 562 struct gl_context *ctx = copy->ctx; 563 GLuint i; 564 565 /* Free our vertex and index buffers: 566 */ 567 free(copy->translated_elt_buf); 568 free(copy->dstbuf); 569 free(copy->dstelt); 570 571 /* Unmap VBO's 572 */ 573 for (i = 0; i < copy->nr_varying; i++) { 574 struct gl_buffer_object *vbo = copy->varying[i].array->BufferObj; 575 if (_mesa_is_bufferobj(vbo) && _mesa_bufferobj_mapped(vbo)) 576 ctx->Driver.UnmapBuffer(ctx, vbo); 577 } 578 579 /* Unmap index buffer: 580 */ 581 if (_mesa_is_bufferobj(copy->ib->obj) && 582 _mesa_bufferobj_mapped(copy->ib->obj)) { 583 ctx->Driver.UnmapBuffer(ctx, copy->ib->obj); 584 } 585} 586 587 588/** 589 * Split VBO into smaller pieces, draw the pieces. 590 */ 591void vbo_split_copy( struct gl_context *ctx, 592 const struct gl_client_array *arrays[], 593 const struct _mesa_prim *prim, 594 GLuint nr_prims, 595 const struct _mesa_index_buffer *ib, 596 vbo_draw_func draw, 597 const struct split_limits *limits ) 598{ 599 struct copy_context copy; 600 GLuint i, this_nr_prims; 601 602 for (i = 0; i < nr_prims;) { 603 /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices 604 * will rebase the elements to the basevertex, and we'll only 605 * emit strings of prims with the same basevertex in one draw call. 606 */ 607 for (this_nr_prims = 1; i + this_nr_prims < nr_prims; 608 this_nr_prims++) { 609 if (prim[i].basevertex != prim[i + this_nr_prims].basevertex) 610 break; 611 } 612 613 memset(©, 0, sizeof(copy)); 614 615 /* Require indexed primitives: 616 */ 617 assert(ib); 618 619 copy.ctx = ctx; 620 copy.array = arrays; 621 copy.prim = &prim[i]; 622 copy.nr_prims = this_nr_prims; 623 copy.ib = ib; 624 copy.draw = draw; 625 copy.limits = limits; 626 627 /* Clear the vertex cache: 628 */ 629 for (i = 0; i < ELT_TABLE_SIZE; i++) 630 copy.vert_cache[i].in = ~0; 631 632 replay_init(©); 633 replay_elts(©); 634 replay_finish(©); 635 } 636} 637