brw_draw.c revision a2a7e640a4e81c906e42a98602c84757c37ed0b1
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <sys/errno.h> 29 30#include "main/glheader.h" 31#include "main/context.h" 32#include "main/condrender.h" 33#include "main/samplerobj.h" 34#include "main/state.h" 35#include "main/enums.h" 36#include "main/macros.h" 37#include "tnl/tnl.h" 38#include "vbo/vbo_context.h" 39#include "swrast/swrast.h" 40#include "swrast_setup/swrast_setup.h" 41#include "drivers/common/meta.h" 42 43#include "brw_draw.h" 44#include "brw_defines.h" 45#include "brw_context.h" 46#include "brw_state.h" 47 48#include "intel_batchbuffer.h" 49#include "intel_fbo.h" 50#include "intel_mipmap_tree.h" 51#include "intel_regions.h" 52 53#define FILE_DEBUG_FLAG DEBUG_PRIMS 54 55static GLuint prim_to_hw_prim[GL_POLYGON+1] = { 56 _3DPRIM_POINTLIST, 57 _3DPRIM_LINELIST, 58 _3DPRIM_LINELOOP, 59 _3DPRIM_LINESTRIP, 60 _3DPRIM_TRILIST, 61 _3DPRIM_TRISTRIP, 62 _3DPRIM_TRIFAN, 63 _3DPRIM_QUADLIST, 64 _3DPRIM_QUADSTRIP, 65 _3DPRIM_POLYGON 66}; 67 68 69static const GLenum reduced_prim[GL_POLYGON+1] = { 70 GL_POINTS, 71 GL_LINES, 72 GL_LINES, 73 GL_LINES, 74 GL_TRIANGLES, 75 GL_TRIANGLES, 76 GL_TRIANGLES, 77 GL_TRIANGLES, 78 GL_TRIANGLES, 79 GL_TRIANGLES 80}; 81 82 83/* When the primitive changes, set a state bit and re-validate. Not 84 * the nicest and would rather deal with this by having all the 85 * programs be immune to the active primitive (ie. cope with all 86 * possibilities). That may not be realistic however. 87 */ 88static void brw_set_prim(struct brw_context *brw, 89 const struct _mesa_prim *prim) 90{ 91 struct gl_context *ctx = &brw->intel.ctx; 92 uint32_t hw_prim = prim_to_hw_prim[prim->mode]; 93 94 DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); 95 96 /* Slight optimization to avoid the GS program when not needed: 97 */ 98 if (prim->mode == GL_QUAD_STRIP && 99 ctx->Light.ShadeModel != GL_FLAT && 100 ctx->Polygon.FrontMode == GL_FILL && 101 ctx->Polygon.BackMode == GL_FILL) 102 hw_prim = _3DPRIM_TRISTRIP; 103 104 if (prim->mode == GL_QUADS && prim->count == 4 && 105 ctx->Light.ShadeModel != GL_FLAT && 106 ctx->Polygon.FrontMode == GL_FILL && 107 ctx->Polygon.BackMode == GL_FILL) { 108 hw_prim = _3DPRIM_TRIFAN; 109 } 110 111 if (hw_prim != brw->primitive) { 112 brw->primitive = hw_prim; 113 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 114 115 if (reduced_prim[prim->mode] != brw->intel.reduced_primitive) { 116 brw->intel.reduced_primitive = reduced_prim[prim->mode]; 117 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; 118 } 119 } 120} 121 122static void gen6_set_prim(struct brw_context *brw, 123 const struct _mesa_prim *prim) 124{ 125 uint32_t hw_prim; 126 127 DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); 128 129 hw_prim = prim_to_hw_prim[prim->mode]; 130 131 if (hw_prim != brw->primitive) { 132 brw->primitive = hw_prim; 133 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 134 } 135} 136 137 138static GLuint trim(GLenum prim, GLuint length) 139{ 140 if (prim == GL_QUAD_STRIP) 141 return length > 3 ? (length - length % 2) : 0; 142 else if (prim == GL_QUADS) 143 return length - length % 4; 144 else 145 return length; 146} 147 148 149static void brw_emit_prim(struct brw_context *brw, 150 const struct _mesa_prim *prim, 151 uint32_t hw_prim) 152{ 153 struct intel_context *intel = &brw->intel; 154 int verts_per_instance; 155 int vertex_access_type; 156 int start_vertex_location; 157 int base_vertex_location; 158 159 DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 160 prim->start, prim->count); 161 162 start_vertex_location = prim->start; 163 base_vertex_location = prim->basevertex; 164 if (prim->indexed) { 165 vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 166 start_vertex_location += brw->ib.start_vertex_offset; 167 base_vertex_location += brw->vb.start_vertex_bias; 168 } else { 169 vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 170 start_vertex_location += brw->vb.start_vertex_bias; 171 } 172 173 verts_per_instance = trim(prim->mode, prim->count); 174 175 /* If nothing to emit, just return. */ 176 if (verts_per_instance == 0) 177 return; 178 179 /* If we're set to always flush, do it before and after the primitive emit. 180 * We want to catch both missed flushes that hurt instruction/state cache 181 * and missed flushes of the render cache as it heads to other parts of 182 * the besides the draw code. 183 */ 184 if (intel->always_flush_cache) { 185 intel_batchbuffer_emit_mi_flush(intel); 186 } 187 188 BEGIN_BATCH(6); 189 OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | 190 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | 191 vertex_access_type); 192 OUT_BATCH(verts_per_instance); 193 OUT_BATCH(start_vertex_location); 194 OUT_BATCH(prim->num_instances); 195 OUT_BATCH(0); // start instance location 196 OUT_BATCH(base_vertex_location); 197 ADVANCE_BATCH(); 198 199 intel->batch.need_workaround_flush = true; 200 201 if (intel->always_flush_cache) { 202 intel_batchbuffer_emit_mi_flush(intel); 203 } 204} 205 206static void gen7_emit_prim(struct brw_context *brw, 207 const struct _mesa_prim *prim, 208 uint32_t hw_prim) 209{ 210 struct intel_context *intel = &brw->intel; 211 int verts_per_instance; 212 int vertex_access_type; 213 int start_vertex_location; 214 int base_vertex_location; 215 216 DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 217 prim->start, prim->count); 218 219 start_vertex_location = prim->start; 220 base_vertex_location = prim->basevertex; 221 if (prim->indexed) { 222 vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 223 start_vertex_location += brw->ib.start_vertex_offset; 224 base_vertex_location += brw->vb.start_vertex_bias; 225 } else { 226 vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 227 start_vertex_location += brw->vb.start_vertex_bias; 228 } 229 230 verts_per_instance = trim(prim->mode, prim->count); 231 232 /* If nothing to emit, just return. */ 233 if (verts_per_instance == 0) 234 return; 235 236 /* If we're set to always flush, do it before and after the primitive emit. 237 * We want to catch both missed flushes that hurt instruction/state cache 238 * and missed flushes of the render cache as it heads to other parts of 239 * the besides the draw code. 240 */ 241 if (intel->always_flush_cache) { 242 intel_batchbuffer_emit_mi_flush(intel); 243 } 244 245 BEGIN_BATCH(7); 246 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); 247 OUT_BATCH(hw_prim | vertex_access_type); 248 OUT_BATCH(verts_per_instance); 249 OUT_BATCH(start_vertex_location); 250 OUT_BATCH(prim->num_instances); 251 OUT_BATCH(0); // start instance location 252 OUT_BATCH(base_vertex_location); 253 ADVANCE_BATCH(); 254 255 if (intel->always_flush_cache) { 256 intel_batchbuffer_emit_mi_flush(intel); 257 } 258} 259 260 261static void brw_merge_inputs( struct brw_context *brw, 262 const struct gl_client_array *arrays[]) 263{ 264 struct brw_vertex_info old = brw->vb.info; 265 GLuint i; 266 267 for (i = 0; i < brw->vb.nr_buffers; i++) { 268 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 269 brw->vb.buffers[i].bo = NULL; 270 } 271 brw->vb.nr_buffers = 0; 272 273 memset(&brw->vb.info, 0, sizeof(brw->vb.info)); 274 275 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 276 brw->vb.inputs[i].buffer = -1; 277 brw->vb.inputs[i].glarray = arrays[i]; 278 brw->vb.inputs[i].attrib = (gl_vert_attrib) i; 279 280 if (arrays[i]->StrideB != 0) 281 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << 282 ((i%16) * 2); 283 } 284 285 /* Raise statechanges if input sizes have changed. */ 286 if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) 287 brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; 288} 289 290/* 291 * \brief Resolve buffers before drawing. 292 * 293 * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each 294 * enabled depth texture. 295 * 296 * (In the future, this will also perform MSAA resolves). 297 */ 298static void 299brw_predraw_resolve_buffers(struct brw_context *brw) 300{ 301 struct gl_context *ctx = &brw->intel.ctx; 302 struct intel_context *intel = &brw->intel; 303 struct intel_renderbuffer *depth_irb; 304 struct intel_texture_object *tex_obj; 305 306 /* Resolve the depth buffer's HiZ buffer. */ 307 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH); 308 if (depth_irb) 309 intel_renderbuffer_resolve_hiz(intel, depth_irb); 310 311 /* Resolve depth buffer of each enabled depth texture. */ 312 for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) { 313 if (!ctx->Texture.Unit[i]._ReallyEnabled) 314 continue; 315 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current); 316 if (!tex_obj || !tex_obj->mt) 317 continue; 318 intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt); 319 } 320} 321 322/** 323 * \brief Call this after drawing to mark which buffers need resolving 324 * 325 * If the depth buffer was written to and if it has an accompanying HiZ 326 * buffer, then mark that it needs a depth resolve. 327 * 328 * If the color buffer is a multisample window system buffer, then 329 * mark that it needs a downsample. 330 */ 331static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) 332{ 333 struct intel_context *intel = &brw->intel; 334 struct gl_context *ctx = &brw->intel.ctx; 335 struct gl_framebuffer *fb = ctx->DrawBuffer; 336 337 struct intel_renderbuffer *front_irb = NULL; 338 struct intel_renderbuffer *back_irb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT); 339 struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); 340 341 if (intel->is_front_buffer_rendering) 342 front_irb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT); 343 344 if (front_irb) 345 intel_renderbuffer_set_needs_downsample(front_irb); 346 if (back_irb) 347 intel_renderbuffer_set_needs_downsample(back_irb); 348 if (depth_irb && ctx->Depth.Mask) 349 intel_renderbuffer_set_needs_depth_resolve(depth_irb); 350} 351 352static int 353verts_per_prim(GLenum mode) 354{ 355 switch (mode) { 356 case GL_POINTS: 357 return 1; 358 case GL_LINE_STRIP: 359 case GL_LINE_LOOP: 360 case GL_LINES: 361 return 2; 362 case GL_TRIANGLE_STRIP: 363 case GL_TRIANGLE_FAN: 364 case GL_POLYGON: 365 case GL_TRIANGLES: 366 case GL_QUADS: 367 case GL_QUAD_STRIP: 368 return 3; 369 default: 370 _mesa_problem(NULL, 371 "unknown prim type in transform feedback primitive count"); 372 return 0; 373 } 374} 375 376/** 377 * Update internal counters based on the the drawing operation described in 378 * prim. 379 */ 380static void 381brw_update_primitive_count(struct brw_context *brw, 382 const struct _mesa_prim *prim) 383{ 384 uint32_t count = count_tessellated_primitives(prim); 385 brw->sol.primitives_generated += count; 386 if (brw->intel.ctx.TransformFeedback.CurrentObject->Active && 387 !brw->intel.ctx.TransformFeedback.CurrentObject->Paused) { 388 /* Update brw->sol.svbi_0_max_index to reflect the amount by which the 389 * hardware is going to increment SVBI 0 when this drawing operation 390 * occurs. This is necessary because the kernel does not (yet) save and 391 * restore GPU registers when context switching, so we'll need to be 392 * able to reload SVBI 0 with the correct value in case we have to start 393 * a new batch buffer. 394 */ 395 unsigned verts = verts_per_prim(prim->mode); 396 uint32_t space_avail = 397 (brw->sol.svbi_0_max_index - brw->sol.svbi_0_starting_index) / verts; 398 uint32_t primitives_written = MIN2 (space_avail, count); 399 brw->sol.svbi_0_starting_index += verts * primitives_written; 400 401 /* And update the TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN query. */ 402 brw->sol.primitives_written += primitives_written; 403 } 404} 405 406/* May fail if out of video memory for texture or vbo upload, or on 407 * fallback conditions. 408 */ 409static bool brw_try_draw_prims( struct gl_context *ctx, 410 const struct gl_client_array *arrays[], 411 const struct _mesa_prim *prim, 412 GLuint nr_prims, 413 const struct _mesa_index_buffer *ib, 414 GLuint min_index, 415 GLuint max_index ) 416{ 417 struct intel_context *intel = intel_context(ctx); 418 struct brw_context *brw = brw_context(ctx); 419 bool retval = true; 420 GLuint i; 421 bool fail_next = false; 422 423 if (ctx->NewState) 424 _mesa_update_state( ctx ); 425 426 /* We have to validate the textures *before* checking for fallbacks; 427 * otherwise, the software fallback won't be able to rely on the 428 * texture state, the firstLevel and lastLevel fields won't be 429 * set in the intel texture object (they'll both be 0), and the 430 * software fallback will segfault if it attempts to access any 431 * texture level other than level 0. 432 */ 433 brw_validate_textures( brw ); 434 435 /* Resolves must occur after updating state and finalizing textures but 436 * before setting up any hardware state for this draw call. 437 */ 438 brw_predraw_resolve_buffers(brw); 439 440 /* Bind all inputs, derive varying and size information: 441 */ 442 brw_merge_inputs( brw, arrays ); 443 444 brw->ib.ib = ib; 445 brw->state.dirty.brw |= BRW_NEW_INDICES; 446 447 brw->vb.min_index = min_index; 448 brw->vb.max_index = max_index; 449 brw->state.dirty.brw |= BRW_NEW_VERTICES; 450 451 /* Have to validate state quite late. Will rebuild tnl_program, 452 * which depends on varying information. 453 * 454 * Note this is where brw->vs->prog_data.inputs_read is calculated, 455 * so can't access it earlier. 456 */ 457 458 intel_prepare_render(intel); 459 460 for (i = 0; i < nr_prims; i++) { 461 int estimated_max_prim_size; 462 463 estimated_max_prim_size = 512; /* batchbuffer commands */ 464 estimated_max_prim_size += (BRW_MAX_TEX_UNIT * 465 (sizeof(struct brw_sampler_state) + 466 sizeof(struct gen5_sampler_default_color))); 467 estimated_max_prim_size += 1024; /* gen6 VS push constants */ 468 estimated_max_prim_size += 1024; /* gen6 WM push constants */ 469 estimated_max_prim_size += 512; /* misc. pad */ 470 471 /* Flush the batch if it's approaching full, so that we don't wrap while 472 * we've got validated state that needs to be in the same batch as the 473 * primitives. 474 */ 475 intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); 476 intel_batchbuffer_save_state(intel); 477 478 brw->num_instances = prim->num_instances; 479 if (intel->gen < 6) 480 brw_set_prim(brw, &prim[i]); 481 else 482 gen6_set_prim(brw, &prim[i]); 483 484retry: 485 /* Note that before the loop, brw->state.dirty.brw was set to != 0, and 486 * that the state updated in the loop outside of this block is that in 487 * *_set_prim or intel_batchbuffer_flush(), which only impacts 488 * brw->state.dirty.brw. 489 */ 490 if (brw->state.dirty.brw) { 491 intel->no_batch_wrap = true; 492 brw_upload_state(brw); 493 494 if (unlikely(brw->intel.Fallback)) { 495 intel->no_batch_wrap = false; 496 retval = false; 497 goto out; 498 } 499 } 500 501 if (intel->gen >= 7) 502 gen7_emit_prim(brw, &prim[i], brw->primitive); 503 else 504 brw_emit_prim(brw, &prim[i], brw->primitive); 505 506 intel->no_batch_wrap = false; 507 508 if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) { 509 if (!fail_next) { 510 intel_batchbuffer_reset_to_saved(intel); 511 intel_batchbuffer_flush(intel); 512 fail_next = true; 513 goto retry; 514 } else { 515 if (intel_batchbuffer_flush(intel) == -ENOSPC) { 516 static bool warned = false; 517 518 if (!warned) { 519 fprintf(stderr, "i965: Single primitive emit exceeded" 520 "available aperture space\n"); 521 warned = true; 522 } 523 524 retval = false; 525 } 526 } 527 } 528 529 if (!_mesa_meta_in_progress(ctx)) 530 brw_update_primitive_count(brw, &prim[i]); 531 } 532 533 if (intel->always_flush_batch) 534 intel_batchbuffer_flush(intel); 535 out: 536 537 brw_state_cache_check_size(brw); 538 brw_postdraw_set_buffers_need_resolve(brw); 539 540 return retval; 541} 542 543void brw_draw_prims( struct gl_context *ctx, 544 const struct _mesa_prim *prim, 545 GLuint nr_prims, 546 const struct _mesa_index_buffer *ib, 547 GLboolean index_bounds_valid, 548 GLuint min_index, 549 GLuint max_index, 550 struct gl_transform_feedback_object *tfb_vertcount ) 551{ 552 const struct gl_client_array **arrays = ctx->Array._DrawArrays; 553 bool retval; 554 555 if (!_mesa_check_conditional_render(ctx)) 556 return; 557 558 /* Handle primitive restart if needed */ 559 if (brw_handle_primitive_restart(ctx, prim, nr_prims, ib)) { 560 /* The draw was handled, so we can exit now */ 561 return; 562 } 563 564 if (!vbo_all_varyings_in_vbos(arrays)) { 565 if (!index_bounds_valid) 566 vbo_get_minmax_indices(ctx, prim, ib, &min_index, &max_index, nr_prims); 567 568 /* Decide if we want to rebase. If so we end up recursing once 569 * only into this function. 570 */ 571 if (min_index != 0 && !vbo_any_varyings_in_vbos(arrays)) { 572 vbo_rebase_prims(ctx, arrays, 573 prim, nr_prims, 574 ib, min_index, max_index, 575 brw_draw_prims ); 576 return; 577 } 578 } 579 580 /* Make a first attempt at drawing: 581 */ 582 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 583 584 /* Otherwise, we really are out of memory. Pass the drawing 585 * command to the software tnl module and which will in turn call 586 * swrast to do the drawing. 587 */ 588 if (!retval) { 589 _swsetup_Wakeup(ctx); 590 _tnl_wakeup(ctx); 591 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 592 } 593 594} 595 596void brw_draw_init( struct brw_context *brw ) 597{ 598 struct gl_context *ctx = &brw->intel.ctx; 599 struct vbo_context *vbo = vbo_context(ctx); 600 int i; 601 602 /* Register our drawing function: 603 */ 604 vbo->draw_prims = brw_draw_prims; 605 606 for (i = 0; i < VERT_ATTRIB_MAX; i++) 607 brw->vb.inputs[i].buffer = -1; 608 brw->vb.nr_buffers = 0; 609 brw->vb.nr_enabled = 0; 610} 611 612void brw_draw_destroy( struct brw_context *brw ) 613{ 614 int i; 615 616 for (i = 0; i < brw->vb.nr_buffers; i++) { 617 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 618 brw->vb.buffers[i].bo = NULL; 619 } 620 brw->vb.nr_buffers = 0; 621 622 for (i = 0; i < brw->vb.nr_enabled; i++) { 623 brw->vb.enabled[i]->buffer = -1; 624 } 625 brw->vb.nr_enabled = 0; 626 627 drm_intel_bo_unreference(brw->ib.bo); 628 brw->ib.bo = NULL; 629} 630