r300_render.c revision 76034aaf655134c71e1ec619085c46251d037720
1/* 2 * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 22 23/* r300_render: Vertex and index buffer primitive emission. Contains both 24 * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */ 25 26#include "draw/draw_context.h" 27#include "draw/draw_vbuf.h" 28 29#include "util/u_inlines.h" 30 31#include "util/u_format.h" 32#include "util/u_memory.h" 33#include "util/u_upload_mgr.h" 34#include "util/u_prim.h" 35 36#include "r300_cs.h" 37#include "r300_context.h" 38#include "r300_screen_buffer.h" 39#include "r300_emit.h" 40#include "r300_reg.h" 41#include "r300_state_derived.h" 42 43static uint32_t r300_translate_primitive(unsigned prim) 44{ 45 switch (prim) { 46 case PIPE_PRIM_POINTS: 47 return R300_VAP_VF_CNTL__PRIM_POINTS; 48 case PIPE_PRIM_LINES: 49 return R300_VAP_VF_CNTL__PRIM_LINES; 50 case PIPE_PRIM_LINE_LOOP: 51 return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; 52 case PIPE_PRIM_LINE_STRIP: 53 return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; 54 case PIPE_PRIM_TRIANGLES: 55 return R300_VAP_VF_CNTL__PRIM_TRIANGLES; 56 case PIPE_PRIM_TRIANGLE_STRIP: 57 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; 58 case PIPE_PRIM_TRIANGLE_FAN: 59 return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; 60 case PIPE_PRIM_QUADS: 61 return R300_VAP_VF_CNTL__PRIM_QUADS; 62 case PIPE_PRIM_QUAD_STRIP: 63 return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; 64 case PIPE_PRIM_POLYGON: 65 return R300_VAP_VF_CNTL__PRIM_POLYGON; 66 default: 67 return 0; 68 } 69} 70 71static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300, 72 unsigned mode) 73{ 74 struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state; 75 uint32_t color_control = rs->color_control; 76 77 /* By default (see r300_state.c:r300_create_rs_state) color_control is 78 * initialized to provoking the first vertex. 79 * 80 * Triangle fans must be reduced to the second vertex, not the first, in 81 * Gallium flatshade-first mode, as per the GL spec. 82 * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt) 83 * 84 * Quads never provoke correctly in flatshade-first mode. The first 85 * vertex is never considered as provoking, so only the second, third, 86 * and fourth vertices can be selected, and both "third" and "last" modes 87 * select the fourth vertex. This is probably due to D3D lacking quads. 88 * 89 * Similarly, polygons reduce to the first, not the last, vertex, when in 90 * "last" mode, and all other modes start from the second vertex. 91 * 92 * ~ C. 93 */ 94 95 if (rs->rs.flatshade_first) { 96 switch (mode) { 97 case PIPE_PRIM_TRIANGLE_FAN: 98 color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND; 99 break; 100 case PIPE_PRIM_QUADS: 101 case PIPE_PRIM_QUAD_STRIP: 102 case PIPE_PRIM_POLYGON: 103 color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; 104 break; 105 default: 106 color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST; 107 break; 108 } 109 } else { 110 color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST; 111 } 112 113 return color_control; 114} 115 116static void r500_emit_index_offset(struct r300_context *r300, int index_bias) 117{ 118 CS_LOCALS(r300); 119 120 if (r300->screen->caps.is_r500 && 121 r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { 122 BEGIN_CS(2); 123 OUT_CS_REG(R500_VAP_INDEX_OFFSET, 124 (index_bias & 0xFFFFFF) | (index_bias < 0 ? 1<<24 : 0)); 125 END_CS; 126 } else { 127 if (index_bias) { 128 fprintf(stderr, "r300: Non-zero index bias is unsupported " 129 "on this hardware.\n"); 130 assert(0); 131 } 132 } 133} 134 135enum r300_prepare_flags { 136 PREP_FIRST_DRAW = (1 << 0), 137 PREP_VALIDATE_VBOS = (1 << 1), 138 PREP_EMIT_AOS = (1 << 2), 139 PREP_EMIT_AOS_SWTCL = (1 << 3), 140 PREP_INDEXED = (1 << 4) 141}; 142 143/* Check if the requested number of dwords is available in the CS and 144 * if not, flush. Then validate buffers and emit dirty state. 145 * Return TRUE if flush occured. */ 146static void r300_prepare_for_rendering(struct r300_context *r300, 147 enum r300_prepare_flags flags, 148 struct pipe_resource *index_buffer, 149 unsigned cs_dwords, 150 unsigned aos_offset, 151 int index_bias, 152 unsigned *end_cs_dwords) 153{ 154 boolean flushed = FALSE; 155 boolean first_draw = flags & PREP_FIRST_DRAW; 156 boolean emit_aos = flags & PREP_EMIT_AOS; 157 boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL; 158 unsigned end_dwords = 0; 159 160 /* Add dirty state, index offset, and AOS. */ 161 if (first_draw) { 162 cs_dwords += r300_get_num_dirty_dwords(r300); 163 164 if (r300->screen->caps.is_r500) 165 cs_dwords += 2; /* emit_index_offset */ 166 167 if (emit_aos) 168 cs_dwords += 55; /* emit_aos */ 169 170 if (emit_aos_swtcl) 171 cs_dwords += 7; /* emit_aos_swtcl */ 172 } 173 174 /* Emitted in flush. */ 175 end_dwords += 26; /* emit_query_end */ 176 177 cs_dwords += end_dwords; 178 179 /* Reserve requested CS space. */ 180 if (!r300_check_cs(r300, cs_dwords)) { 181 r300->context.flush(&r300->context, 0, NULL); 182 flushed = TRUE; 183 } 184 185 /* Validate buffers and emit dirty state if needed. */ 186 if (first_draw || flushed) { 187 r300_emit_buffer_validate(r300, flags & PREP_VALIDATE_VBOS, index_buffer); 188 r300_emit_dirty_state(r300); 189 r500_emit_index_offset(r300, index_bias); 190 if (emit_aos) 191 r300_emit_aos(r300, aos_offset, flags & PREP_INDEXED); 192 if (emit_aos_swtcl) 193 r300_emit_aos_swtcl(r300, flags & PREP_INDEXED); 194 } 195 196 if (end_cs_dwords) 197 *end_cs_dwords = end_dwords; 198} 199 200static boolean immd_is_good_idea(struct r300_context *r300, 201 unsigned count) 202{ 203 struct pipe_vertex_element* velem; 204 struct pipe_vertex_buffer* vbuf; 205 boolean checked[PIPE_MAX_ATTRIBS] = {0}; 206 unsigned vertex_element_count = r300->velems->count; 207 unsigned i, vbi; 208 209 if (DBG_ON(r300, DBG_NO_IMMD)) { 210 return FALSE; 211 } 212 213 if (r300->draw) { 214 return FALSE; 215 } 216 217 if (count > 10) { 218 return FALSE; 219 } 220 221 /* We shouldn't map buffers referenced by CS, busy buffers, 222 * and ones placed in VRAM. */ 223 /* XXX Check for VRAM buffers. */ 224 for (i = 0; i < vertex_element_count; i++) { 225 velem = &r300->velems->velem[i]; 226 vbi = velem->vertex_buffer_index; 227 228 if (!checked[vbi]) { 229 vbuf = &r300->vertex_buffer[vbi]; 230 231 if (r300_buffer_is_referenced(&r300->context, 232 vbuf->buffer, 233 R300_REF_CS | R300_REF_HW)) { 234 /* It's a very bad idea to map it... */ 235 return FALSE; 236 } 237 checked[vbi] = TRUE; 238 } 239 } 240 return TRUE; 241} 242 243/***************************************************************************** 244 * The emission of draw packets for r500. Older GPUs may use these functions * 245 * after resolving fallback issues (e.g. stencil ref two-sided). * 246 ****************************************************************************/ 247 248static void r300_emit_draw_arrays_immediate(struct r300_context *r300, 249 unsigned mode, 250 unsigned start, 251 unsigned count) 252{ 253 struct pipe_vertex_element* velem; 254 struct pipe_vertex_buffer* vbuf; 255 unsigned vertex_element_count = r300->velems->count; 256 unsigned i, v, vbi, dw, elem_offset, dwords; 257 258 /* Size of the vertex, in dwords. */ 259 unsigned vertex_size = 0; 260 261 /* Offsets of the attribute, in dwords, from the start of the vertex. */ 262 unsigned offset[PIPE_MAX_ATTRIBS]; 263 264 /* Size of the vertex element, in dwords. */ 265 unsigned size[PIPE_MAX_ATTRIBS]; 266 267 /* Stride to the same attrib in the next vertex in the vertex buffer, 268 * in dwords. */ 269 unsigned stride[PIPE_MAX_ATTRIBS] = {0}; 270 271 /* Mapped vertex buffers. */ 272 uint32_t* map[PIPE_MAX_ATTRIBS] = {0}; 273 struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL}; 274 275 CS_LOCALS(r300); 276 277 /* Calculate the vertex size, offsets, strides etc. and map the buffers. */ 278 for (i = 0; i < vertex_element_count; i++) { 279 velem = &r300->velems->velem[i]; 280 offset[i] = velem->src_offset / 4; 281 size[i] = util_format_get_blocksize(velem->src_format) / 4; 282 vertex_size += size[i]; 283 vbi = velem->vertex_buffer_index; 284 285 /* Map the buffer. */ 286 if (!map[vbi]) { 287 vbuf = &r300->vertex_buffer[vbi]; 288 map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context, 289 vbuf->buffer, 290 PIPE_TRANSFER_READ, 291 &transfer[vbi]); 292 map[vbi] += vbuf->buffer_offset / 4; 293 stride[vbi] = vbuf->stride / 4; 294 } 295 } 296 297 dwords = 9 + count * vertex_size; 298 299 r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL); 300 301 BEGIN_CS(dwords); 302 OUT_CS_REG(R300_GA_COLOR_CONTROL, 303 r300_provoking_vertex_fixes(r300, mode)); 304 OUT_CS_REG(R300_VAP_VTX_SIZE, vertex_size); 305 OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); 306 OUT_CS(count - 1); 307 OUT_CS(0); 308 OUT_CS_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size); 309 OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) | 310 r300_translate_primitive(mode)); 311 312 /* Emit vertices. */ 313 for (v = 0; v < count; v++) { 314 for (i = 0; i < vertex_element_count; i++) { 315 velem = &r300->velems->velem[i]; 316 vbi = velem->vertex_buffer_index; 317 elem_offset = offset[i] + stride[vbi] * (v + start); 318 319 for (dw = 0; dw < size[i]; dw++) { 320 OUT_CS(map[vbi][elem_offset + dw]); 321 } 322 } 323 } 324 END_CS; 325 326 /* Unmap buffers. */ 327 for (i = 0; i < vertex_element_count; i++) { 328 vbi = r300->velems->velem[i].vertex_buffer_index; 329 330 if (map[vbi]) { 331 vbuf = &r300->vertex_buffer[vbi]; 332 pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]); 333 map[vbi] = NULL; 334 } 335 } 336} 337 338static void r300_emit_draw_arrays(struct r300_context *r300, 339 unsigned mode, 340 unsigned count) 341{ 342 boolean alt_num_verts = count > 65535; 343 CS_LOCALS(r300); 344 345 if (count >= (1 << 24)) { 346 fprintf(stderr, "r300: Got a huge number of vertices: %i, " 347 "refusing to render.\n", count); 348 return; 349 } 350 351 BEGIN_CS(7 + (alt_num_verts ? 2 : 0)); 352 if (alt_num_verts) { 353 OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); 354 } 355 OUT_CS_REG(R300_GA_COLOR_CONTROL, 356 r300_provoking_vertex_fixes(r300, mode)); 357 OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); 358 OUT_CS(count - 1); 359 OUT_CS(0); 360 OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); 361 OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | 362 r300_translate_primitive(mode) | 363 (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); 364 END_CS; 365} 366 367static void r300_emit_draw_elements(struct r300_context *r300, 368 struct pipe_resource* indexBuffer, 369 unsigned indexSize, 370 unsigned minIndex, 371 unsigned maxIndex, 372 unsigned mode, 373 unsigned start, 374 unsigned count) 375{ 376 uint32_t count_dwords; 377 uint32_t offset_dwords = indexSize * start / sizeof(uint32_t); 378 boolean alt_num_verts = count > 65535; 379 CS_LOCALS(r300); 380 381 if (count >= (1 << 24)) { 382 fprintf(stderr, "r300: Got a huge number of vertices: %i, " 383 "refusing to render.\n", count); 384 return; 385 } 386 387 maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index); 388 389 DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n", 390 count, minIndex, maxIndex); 391 392 BEGIN_CS(13 + (alt_num_verts ? 2 : 0)); 393 if (alt_num_verts) { 394 OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count); 395 } 396 OUT_CS_REG(R300_GA_COLOR_CONTROL, 397 r300_provoking_vertex_fixes(r300, mode)); 398 OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2); 399 OUT_CS(maxIndex); 400 OUT_CS(minIndex); 401 OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); 402 if (indexSize == 4) { 403 count_dwords = count; 404 OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | 405 R300_VAP_VF_CNTL__INDEX_SIZE_32bit | 406 r300_translate_primitive(mode) | 407 (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); 408 } else { 409 count_dwords = (count + 1) / 2; 410 OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | 411 r300_translate_primitive(mode) | 412 (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0)); 413 } 414 415 /* INDX_BUFFER is a truly special packet3. 416 * Unlike most other packet3, where the offset is after the count, 417 * the order is reversed, so the relocation ends up carrying the 418 * size of the indexbuf instead of the offset. 419 */ 420 OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); 421 OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | 422 (0 << R300_INDX_BUFFER_SKIP_SHIFT)); 423 OUT_CS(offset_dwords << 2); 424 OUT_CS_BUF_RELOC(indexBuffer, count_dwords, 425 RADEON_GEM_DOMAIN_GTT, 0, 0); 426 427 END_CS; 428} 429 430static void r300_shorten_ubyte_elts(struct r300_context* r300, 431 struct pipe_resource** elts, 432 unsigned start, 433 unsigned count) 434{ 435 struct pipe_context* context = &r300->context; 436 struct pipe_screen* screen = r300->context.screen; 437 struct pipe_resource* new_elts; 438 unsigned char *in_map; 439 unsigned short *out_map; 440 struct pipe_transfer *src_transfer, *dst_transfer; 441 unsigned i; 442 443 new_elts = pipe_buffer_create(screen, 444 PIPE_BIND_INDEX_BUFFER, 445 2 * count); 446 447 in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer); 448 out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer); 449 450 in_map += start; 451 452 for (i = 0; i < count; i++) { 453 *out_map = (unsigned short)*in_map; 454 in_map++; 455 out_map++; 456 } 457 458 pipe_buffer_unmap(context, *elts, src_transfer); 459 pipe_buffer_unmap(context, new_elts, dst_transfer); 460 461 *elts = new_elts; 462} 463 464static void r300_align_ushort_elts(struct r300_context *r300, 465 struct pipe_resource **elts, 466 unsigned start, unsigned count) 467{ 468 struct pipe_context* context = &r300->context; 469 struct pipe_transfer *in_transfer = NULL; 470 struct pipe_transfer *out_transfer = NULL; 471 struct pipe_resource* new_elts; 472 unsigned short *in_map; 473 unsigned short *out_map; 474 475 new_elts = pipe_buffer_create(context->screen, 476 PIPE_BIND_INDEX_BUFFER, 477 2 * count); 478 479 in_map = pipe_buffer_map(context, *elts, 480 PIPE_TRANSFER_READ, &in_transfer); 481 out_map = pipe_buffer_map(context, new_elts, 482 PIPE_TRANSFER_WRITE, &out_transfer); 483 484 memcpy(out_map, in_map+start, 2 * count); 485 486 pipe_buffer_unmap(context, *elts, in_transfer); 487 pipe_buffer_unmap(context, new_elts, out_transfer); 488 489 *elts = new_elts; 490} 491 492/* This is the fast-path drawing & emission for HW TCL. */ 493static void r300_draw_range_elements(struct pipe_context* pipe, 494 struct pipe_resource* indexBuffer, 495 unsigned indexSize, 496 int indexBias, 497 unsigned minIndex, 498 unsigned maxIndex, 499 unsigned mode, 500 unsigned start, 501 unsigned count) 502{ 503 struct r300_context* r300 = r300_context(pipe); 504 struct pipe_resource* orgIndexBuffer = indexBuffer; 505 boolean alt_num_verts = r300->screen->caps.is_r500 && 506 count > 65536 && 507 r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); 508 unsigned short_count; 509 510 if (r300->skip_rendering) { 511 return; 512 } 513 514 if (!u_trim_pipe_prim(mode, &count)) { 515 return; 516 } 517 518 if (indexSize == 1) { 519 r300_shorten_ubyte_elts(r300, &indexBuffer, start, count); 520 indexSize = 2; 521 start = 0; 522 } else if (indexSize == 2 && start % 2 != 0) { 523 r300_align_ushort_elts(r300, &indexBuffer, start, count); 524 start = 0; 525 } 526 527 r300_update_derived_state(r300); 528 r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count); 529 530 /* 15 dwords for emit_draw_elements */ 531 r300_prepare_for_rendering(r300, 532 PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, 533 indexBuffer, 15, 0, indexBias, NULL); 534 535 u_upload_flush(r300->upload_vb); 536 u_upload_flush(r300->upload_ib); 537 if (alt_num_verts || count <= 65535) { 538 r300_emit_draw_elements(r300, indexBuffer, indexSize, 539 minIndex, maxIndex, mode, start, count); 540 } else { 541 do { 542 short_count = MIN2(count, 65534); 543 r300_emit_draw_elements(r300, indexBuffer, indexSize, 544 minIndex, maxIndex, 545 mode, start, short_count); 546 547 start += short_count; 548 count -= short_count; 549 550 /* 15 dwords for emit_draw_elements */ 551 if (count) { 552 r300_prepare_for_rendering(r300, 553 PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED, 554 indexBuffer, 15, 0, indexBias, NULL); 555 } 556 } while (count); 557 } 558 559 if (indexBuffer != orgIndexBuffer) { 560 pipe_resource_reference( &indexBuffer, NULL ); 561 } 562} 563 564/* Simple helpers for context setup. Should probably be moved to util. */ 565static void r300_draw_elements(struct pipe_context* pipe, 566 struct pipe_resource* indexBuffer, 567 unsigned indexSize, int indexBias, unsigned mode, 568 unsigned start, unsigned count) 569{ 570 struct r300_context *r300 = r300_context(pipe); 571 572 pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, 573 0, r300->vertex_buffer_max_index, 574 mode, start, count); 575} 576 577static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode, 578 unsigned start, unsigned count) 579{ 580 struct r300_context* r300 = r300_context(pipe); 581 boolean alt_num_verts = r300->screen->caps.is_r500 && 582 count > 65536 && 583 r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0); 584 unsigned short_count; 585 586 if (r300->skip_rendering) { 587 return; 588 } 589 590 if (!u_trim_pipe_prim(mode, &count)) { 591 return; 592 } 593 594 r300_update_derived_state(r300); 595 596 if (immd_is_good_idea(r300, count)) { 597 r300_emit_draw_arrays_immediate(r300, mode, start, count); 598 } else { 599 /* 9 spare dwords for emit_draw_arrays. */ 600 r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS, 601 NULL, 9, start, 0, NULL); 602 603 if (alt_num_verts || count <= 65535) { 604 r300_emit_draw_arrays(r300, mode, count); 605 } else { 606 do { 607 short_count = MIN2(count, 65535); 608 r300_emit_draw_arrays(r300, mode, short_count); 609 610 start += short_count; 611 count -= short_count; 612 613 /* 9 spare dwords for emit_draw_arrays. */ 614 if (count) { 615 r300_prepare_for_rendering(r300, 616 PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9, 617 start, 0, NULL); 618 } 619 } while (count); 620 } 621 u_upload_flush(r300->upload_vb); 622 } 623} 624 625/**************************************************************************** 626 * The rest of this file is for SW TCL rendering only. Please be polite and * 627 * keep these functions separated so that they are easier to locate. ~C. * 628 ***************************************************************************/ 629 630/* SW TCL arrays, using Draw. */ 631static void r300_swtcl_draw_arrays(struct pipe_context* pipe, 632 unsigned mode, 633 unsigned start, 634 unsigned count) 635{ 636 struct r300_context* r300 = r300_context(pipe); 637 struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; 638 int i; 639 640 if (r300->skip_rendering) { 641 return; 642 } 643 644 if (!u_trim_pipe_prim(mode, &count)) { 645 return; 646 } 647 648 r300_update_derived_state(r300); 649 650 for (i = 0; i < r300->vertex_buffer_count; i++) { 651 void* buf = pipe_buffer_map(pipe, 652 r300->vertex_buffer[i].buffer, 653 PIPE_TRANSFER_READ, 654 &vb_transfer[i]); 655 draw_set_mapped_vertex_buffer(r300->draw, i, buf); 656 } 657 658 draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL); 659 660 draw_arrays(r300->draw, mode, start, count); 661 662 /* XXX Not sure whether this is the best fix. 663 * It prevents CS from being rejected and weird assertion failures. */ 664 draw_flush(r300->draw); 665 666 for (i = 0; i < r300->vertex_buffer_count; i++) { 667 pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, 668 vb_transfer[i]); 669 draw_set_mapped_vertex_buffer(r300->draw, i, NULL); 670 } 671} 672 673/* SW TCL elements, using Draw. */ 674static void r300_swtcl_draw_range_elements(struct pipe_context* pipe, 675 struct pipe_resource* indexBuffer, 676 unsigned indexSize, 677 int indexBias, 678 unsigned minIndex, 679 unsigned maxIndex, 680 unsigned mode, 681 unsigned start, 682 unsigned count) 683{ 684 struct r300_context* r300 = r300_context(pipe); 685 struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS]; 686 struct pipe_transfer *ib_transfer; 687 int i; 688 void* indices; 689 690 if (r300->skip_rendering) { 691 return; 692 } 693 694 if (!u_trim_pipe_prim(mode, &count)) { 695 return; 696 } 697 698 r300_update_derived_state(r300); 699 700 for (i = 0; i < r300->vertex_buffer_count; i++) { 701 void* buf = pipe_buffer_map(pipe, 702 r300->vertex_buffer[i].buffer, 703 PIPE_TRANSFER_READ, 704 &vb_transfer[i]); 705 draw_set_mapped_vertex_buffer(r300->draw, i, buf); 706 } 707 708 indices = pipe_buffer_map(pipe, indexBuffer, 709 PIPE_TRANSFER_READ, &ib_transfer); 710 draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias, 711 minIndex, maxIndex, indices); 712 713 draw_arrays(r300->draw, mode, start, count); 714 715 /* XXX Not sure whether this is the best fix. 716 * It prevents CS from being rejected and weird assertion failures. */ 717 draw_flush(r300->draw); 718 719 for (i = 0; i < r300->vertex_buffer_count; i++) { 720 pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer, 721 vb_transfer[i]); 722 draw_set_mapped_vertex_buffer(r300->draw, i, NULL); 723 } 724 725 pipe_buffer_unmap(pipe, indexBuffer, 726 ib_transfer); 727 draw_set_mapped_element_buffer_range(r300->draw, 0, 0, 728 start, start + count - 1, 729 NULL); 730} 731 732/* Object for rendering using Draw. */ 733struct r300_render { 734 /* Parent class */ 735 struct vbuf_render base; 736 737 /* Pipe context */ 738 struct r300_context* r300; 739 740 /* Vertex information */ 741 size_t vertex_size; 742 unsigned prim; 743 unsigned hwprim; 744 745 /* VBO */ 746 struct pipe_resource* vbo; 747 size_t vbo_size; 748 size_t vbo_offset; 749 size_t vbo_max_used; 750 void * vbo_ptr; 751 752 struct pipe_transfer *vbo_transfer; 753}; 754 755static INLINE struct r300_render* 756r300_render(struct vbuf_render* render) 757{ 758 return (struct r300_render*)render; 759} 760 761static const struct vertex_info* 762r300_render_get_vertex_info(struct vbuf_render* render) 763{ 764 struct r300_render* r300render = r300_render(render); 765 struct r300_context* r300 = r300render->r300; 766 767 return &r300->vertex_info; 768} 769 770static boolean r300_render_allocate_vertices(struct vbuf_render* render, 771 ushort vertex_size, 772 ushort count) 773{ 774 struct r300_render* r300render = r300_render(render); 775 struct r300_context* r300 = r300render->r300; 776 struct pipe_screen* screen = r300->context.screen; 777 size_t size = (size_t)vertex_size * (size_t)count; 778 779 if (size + r300render->vbo_offset > r300render->vbo_size) 780 { 781 pipe_resource_reference(&r300->vbo, NULL); 782 r300render->vbo = pipe_buffer_create(screen, 783 PIPE_BIND_VERTEX_BUFFER, 784 R300_MAX_DRAW_VBO_SIZE); 785 r300render->vbo_offset = 0; 786 r300render->vbo_size = R300_MAX_DRAW_VBO_SIZE; 787 } 788 789 r300render->vertex_size = vertex_size; 790 r300->vbo = r300render->vbo; 791 r300->vbo_offset = r300render->vbo_offset; 792 793 return (r300render->vbo) ? TRUE : FALSE; 794} 795 796static void* r300_render_map_vertices(struct vbuf_render* render) 797{ 798 struct r300_render* r300render = r300_render(render); 799 800 assert(!r300render->vbo_transfer); 801 802 r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context, 803 r300render->vbo, 804 PIPE_TRANSFER_WRITE, 805 &r300render->vbo_transfer); 806 807 return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset); 808} 809 810static void r300_render_unmap_vertices(struct vbuf_render* render, 811 ushort min, 812 ushort max) 813{ 814 struct r300_render* r300render = r300_render(render); 815 struct pipe_context* context = &r300render->r300->context; 816 817 assert(r300render->vbo_transfer); 818 819 r300render->vbo_max_used = MAX2(r300render->vbo_max_used, 820 r300render->vertex_size * (max + 1)); 821 pipe_buffer_unmap(context, r300render->vbo, r300render->vbo_transfer); 822 823 r300render->vbo_transfer = NULL; 824} 825 826static void r300_render_release_vertices(struct vbuf_render* render) 827{ 828 struct r300_render* r300render = r300_render(render); 829 830 r300render->vbo_offset += r300render->vbo_max_used; 831 r300render->vbo_max_used = 0; 832} 833 834static boolean r300_render_set_primitive(struct vbuf_render* render, 835 unsigned prim) 836{ 837 struct r300_render* r300render = r300_render(render); 838 839 r300render->prim = prim; 840 r300render->hwprim = r300_translate_primitive(prim); 841 842 return TRUE; 843} 844 845static void r300_render_draw_arrays(struct vbuf_render* render, 846 unsigned start, 847 unsigned count) 848{ 849 struct r300_render* r300render = r300_render(render); 850 struct r300_context* r300 = r300render->r300; 851 uint8_t* ptr; 852 unsigned i; 853 unsigned dwords = 6; 854 855 CS_LOCALS(r300); 856 857 (void) i; (void) ptr; 858 859 r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL, 860 NULL, dwords, 0, 0, NULL); 861 862 DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count); 863 864 /* Uncomment to dump all VBOs rendered through this interface. 865 * Slow and noisy! 866 ptr = pipe_buffer_map(&r300render->r300->context, 867 r300render->vbo, PIPE_TRANSFER_READ, 868 &r300render->vbo_transfer); 869 870 for (i = 0; i < count; i++) { 871 printf("r300: Vertex %d\n", i); 872 draw_dump_emitted_vertex(&r300->vertex_info, ptr); 873 ptr += r300->vertex_info.size * 4; 874 printf("\n"); 875 } 876 877 pipe_buffer_unmap(&r300render->r300->context, r300render->vbo, 878 r300render->vbo_transfer); 879 */ 880 881 BEGIN_CS(dwords); 882 OUT_CS_REG(R300_GA_COLOR_CONTROL, 883 r300_provoking_vertex_fixes(r300, r300render->prim)); 884 OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1); 885 OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); 886 OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | 887 r300render->hwprim); 888 END_CS; 889} 890 891static void r300_render_draw_elements(struct vbuf_render* render, 892 const ushort* indices, 893 uint count) 894{ 895 struct r300_render* r300render = r300_render(render); 896 struct r300_context* r300 = r300render->r300; 897 int i; 898 unsigned end_cs_dwords; 899 unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) / 900 (r300render->r300->vertex_info.size * 4) - 1; 901 unsigned short_count; 902 struct r300_cs_info cs_info; 903 904 CS_LOCALS(r300); 905 906 /* Reserve at least 256 dwords. 907 * 908 * Below we manage the CS space manually because there may be more 909 * indices than it can fit in CS. */ 910 r300_prepare_for_rendering(r300, 911 PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED, 912 NULL, 256, 0, 0, &end_cs_dwords); 913 914 while (count) { 915 r300->rws->get_cs_info(r300->rws, &cs_info); 916 917 short_count = MIN2(count, (cs_info.free - end_cs_dwords - 6) * 2); 918 919 BEGIN_CS(6 + (short_count+1)/2); 920 OUT_CS_REG(R300_GA_COLOR_CONTROL, 921 r300_provoking_vertex_fixes(r300, r300render->prim)); 922 OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index); 923 OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2); 924 OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) | 925 r300render->hwprim); 926 for (i = 0; i < short_count-1; i += 2) { 927 OUT_CS(indices[i+1] << 16 | indices[i]); 928 } 929 if (short_count % 2) { 930 OUT_CS(indices[short_count-1]); 931 } 932 END_CS; 933 934 /* OK now subtract the emitted indices and see if we need to emit 935 * another draw packet. */ 936 indices += short_count; 937 count -= short_count; 938 939 if (count) { 940 r300_prepare_for_rendering(r300, 941 PREP_EMIT_AOS_SWTCL | PREP_INDEXED, 942 NULL, 256, 0, 0, &end_cs_dwords); 943 } 944 } 945} 946 947static void r300_render_destroy(struct vbuf_render* render) 948{ 949 FREE(render); 950} 951 952static struct vbuf_render* r300_render_create(struct r300_context* r300) 953{ 954 struct r300_render* r300render = CALLOC_STRUCT(r300_render); 955 956 r300render->r300 = r300; 957 958 /* XXX find real numbers plz */ 959 r300render->base.max_vertex_buffer_bytes = 128 * 1024; 960 r300render->base.max_indices = 16 * 1024; 961 962 r300render->base.get_vertex_info = r300_render_get_vertex_info; 963 r300render->base.allocate_vertices = r300_render_allocate_vertices; 964 r300render->base.map_vertices = r300_render_map_vertices; 965 r300render->base.unmap_vertices = r300_render_unmap_vertices; 966 r300render->base.set_primitive = r300_render_set_primitive; 967 r300render->base.draw_elements = r300_render_draw_elements; 968 r300render->base.draw_arrays = r300_render_draw_arrays; 969 r300render->base.release_vertices = r300_render_release_vertices; 970 r300render->base.destroy = r300_render_destroy; 971 972 r300render->vbo = NULL; 973 r300render->vbo_size = 0; 974 r300render->vbo_offset = 0; 975 976 return &r300render->base; 977} 978 979struct draw_stage* r300_draw_stage(struct r300_context* r300) 980{ 981 struct vbuf_render* render; 982 struct draw_stage* stage; 983 984 render = r300_render_create(r300); 985 986 if (!render) { 987 return NULL; 988 } 989 990 stage = draw_vbuf_stage(r300->draw, render); 991 992 if (!stage) { 993 render->destroy(render); 994 return NULL; 995 } 996 997 draw_set_render(r300->draw, render); 998 999 return stage; 1000} 1001 1002/**************************************************************************** 1003 * Two-sided stencil reference value fallback. It's designed to be as much 1004 * separate from rest of the driver as possible. 1005 ***************************************************************************/ 1006 1007struct r300_stencilref_context { 1008 void (*draw_arrays)(struct pipe_context *pipe, 1009 unsigned mode, unsigned start, unsigned count); 1010 1011 void (*draw_range_elements)( 1012 struct pipe_context *pipe, struct pipe_resource *indexBuffer, 1013 unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, 1014 unsigned mode, unsigned start, unsigned count); 1015 1016 uint32_t rs_cull_mode; 1017 uint32_t zb_stencilrefmask; 1018 ubyte ref_value_front; 1019}; 1020 1021static boolean r300_stencilref_needed(struct r300_context *r300) 1022{ 1023 struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; 1024 1025 return dsa->two_sided_stencil_ref || 1026 (dsa->two_sided && 1027 r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]); 1028} 1029 1030/* Set drawing for front faces. */ 1031static void r300_stencilref_begin(struct r300_context *r300) 1032{ 1033 struct r300_stencilref_context *sr = r300->stencilref_fallback; 1034 struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; 1035 struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; 1036 1037 /* Save state. */ 1038 sr->rs_cull_mode = rs->cull_mode; 1039 sr->zb_stencilrefmask = dsa->stencil_ref_mask; 1040 sr->ref_value_front = r300->stencil_ref.ref_value[0]; 1041 1042 /* We *cull* pixels, therefore no need to mask out the bits. */ 1043 rs->cull_mode |= R300_CULL_BACK; 1044 1045 r300->rs_state.dirty = TRUE; 1046} 1047 1048/* Set drawing for back faces. */ 1049static void r300_stencilref_switch_side(struct r300_context *r300) 1050{ 1051 struct r300_stencilref_context *sr = r300->stencilref_fallback; 1052 struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; 1053 struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; 1054 1055 rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT; 1056 dsa->stencil_ref_mask = dsa->stencil_ref_bf; 1057 r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1]; 1058 1059 r300->rs_state.dirty = TRUE; 1060 r300->dsa_state.dirty = TRUE; 1061} 1062 1063/* Restore the original state. */ 1064static void r300_stencilref_end(struct r300_context *r300) 1065{ 1066 struct r300_stencilref_context *sr = r300->stencilref_fallback; 1067 struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state; 1068 struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state; 1069 1070 /* Restore state. */ 1071 rs->cull_mode = sr->rs_cull_mode; 1072 dsa->stencil_ref_mask = sr->zb_stencilrefmask; 1073 r300->stencil_ref.ref_value[0] = sr->ref_value_front; 1074 1075 r300->rs_state.dirty = TRUE; 1076 r300->dsa_state.dirty = TRUE; 1077} 1078 1079static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode, 1080 unsigned start, unsigned count) 1081{ 1082 struct r300_context *r300 = r300_context(pipe); 1083 struct r300_stencilref_context *sr = r300->stencilref_fallback; 1084 1085 if (!r300_stencilref_needed(r300)) { 1086 sr->draw_arrays(pipe, mode, start, count); 1087 } else { 1088 r300_stencilref_begin(r300); 1089 sr->draw_arrays(pipe, mode, start, count); 1090 r300_stencilref_switch_side(r300); 1091 sr->draw_arrays(pipe, mode, start, count); 1092 r300_stencilref_end(r300); 1093 } 1094} 1095 1096static void r300_stencilref_draw_range_elements( 1097 struct pipe_context *pipe, struct pipe_resource *indexBuffer, 1098 unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex, 1099 unsigned mode, unsigned start, unsigned count) 1100{ 1101 struct r300_context *r300 = r300_context(pipe); 1102 struct r300_stencilref_context *sr = r300->stencilref_fallback; 1103 1104 if (!r300_stencilref_needed(r300)) { 1105 sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, 1106 minIndex, maxIndex, mode, start, count); 1107 } else { 1108 r300_stencilref_begin(r300); 1109 sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, 1110 minIndex, maxIndex, mode, start, count); 1111 r300_stencilref_switch_side(r300); 1112 sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias, 1113 minIndex, maxIndex, mode, start, count); 1114 r300_stencilref_end(r300); 1115 } 1116} 1117 1118static void r300_plug_in_stencil_ref_fallback(struct r300_context *r300) 1119{ 1120 r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context); 1121 1122 /* Save original draw functions. */ 1123 r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays; 1124 r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements; 1125 1126 /* Override the draw functions. */ 1127 r300->context.draw_arrays = r300_stencilref_draw_arrays; 1128 r300->context.draw_range_elements = r300_stencilref_draw_range_elements; 1129} 1130 1131void r300_init_render_functions(struct r300_context *r300) 1132{ 1133 /* Set generic functions. */ 1134 r300->context.draw_elements = r300_draw_elements; 1135 1136 /* Set draw functions based on presence of HW TCL. */ 1137 if (r300->screen->caps.has_tcl) { 1138 r300->context.draw_arrays = r300_draw_arrays; 1139 r300->context.draw_range_elements = r300_draw_range_elements; 1140 } else { 1141 r300->context.draw_arrays = r300_swtcl_draw_arrays; 1142 r300->context.draw_range_elements = r300_swtcl_draw_range_elements; 1143 } 1144 1145 /* Plug in two-sided stencil reference value fallback if needed. */ 1146 if (!r300->screen->caps.is_r500) 1147 r300_plug_in_stencil_ref_fallback(r300); 1148} 1149