brw_draw.c revision 2e5a1a254ed81b1d3efa6064f48183eefac784d0
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "main/glheader.h" 30#include "main/context.h" 31#include "main/condrender.h" 32#include "main/samplerobj.h" 33#include "main/state.h" 34#include "main/enums.h" 35#include "tnl/tnl.h" 36#include "vbo/vbo_context.h" 37#include "swrast/swrast.h" 38#include "swrast_setup/swrast_setup.h" 39 40#include "brw_draw.h" 41#include "brw_defines.h" 42#include "brw_context.h" 43#include "brw_state.h" 44 45#include "intel_batchbuffer.h" 46 47#define FILE_DEBUG_FLAG DEBUG_PRIMS 48 49static GLuint prim_to_hw_prim[GL_POLYGON+1] = { 50 _3DPRIM_POINTLIST, 51 _3DPRIM_LINELIST, 52 _3DPRIM_LINELOOP, 53 _3DPRIM_LINESTRIP, 54 _3DPRIM_TRILIST, 55 _3DPRIM_TRISTRIP, 56 _3DPRIM_TRIFAN, 57 _3DPRIM_QUADLIST, 58 _3DPRIM_QUADSTRIP, 59 _3DPRIM_POLYGON 60}; 61 62 63static const GLenum reduced_prim[GL_POLYGON+1] = { 64 GL_POINTS, 65 GL_LINES, 66 GL_LINES, 67 GL_LINES, 68 GL_TRIANGLES, 69 GL_TRIANGLES, 70 GL_TRIANGLES, 71 GL_TRIANGLES, 72 GL_TRIANGLES, 73 GL_TRIANGLES 74}; 75 76 77/* When the primitive changes, set a state bit and re-validate. Not 78 * the nicest and would rather deal with this by having all the 79 * programs be immune to the active primitive (ie. cope with all 80 * possibilities). That may not be realistic however. 81 */ 82static void brw_set_prim(struct brw_context *brw, 83 const struct _mesa_prim *prim) 84{ 85 struct gl_context *ctx = &brw->intel.ctx; 86 uint32_t hw_prim = prim_to_hw_prim[prim->mode]; 87 88 DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); 89 90 /* Slight optimization to avoid the GS program when not needed: 91 */ 92 if (prim->mode == GL_QUAD_STRIP && 93 ctx->Light.ShadeModel != GL_FLAT && 94 ctx->Polygon.FrontMode == GL_FILL && 95 ctx->Polygon.BackMode == GL_FILL) 96 hw_prim = _3DPRIM_TRISTRIP; 97 98 if (prim->mode == GL_QUADS && prim->count == 4 && 99 ctx->Light.ShadeModel != GL_FLAT && 100 ctx->Polygon.FrontMode == GL_FILL && 101 ctx->Polygon.BackMode == GL_FILL) { 102 hw_prim = _3DPRIM_TRIFAN; 103 } 104 105 if (hw_prim != brw->primitive) { 106 brw->primitive = hw_prim; 107 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 108 109 if (reduced_prim[prim->mode] != brw->intel.reduced_primitive) { 110 brw->intel.reduced_primitive = reduced_prim[prim->mode]; 111 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; 112 } 113 } 114} 115 116static void gen6_set_prim(struct brw_context *brw, 117 const struct _mesa_prim *prim) 118{ 119 uint32_t hw_prim = prim_to_hw_prim[prim->mode]; 120 121 DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode)); 122 123 if (hw_prim != brw->primitive) { 124 brw->primitive = hw_prim; 125 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 126 } 127} 128 129 130static GLuint trim(GLenum prim, GLuint length) 131{ 132 if (prim == GL_QUAD_STRIP) 133 return length > 3 ? (length - length % 2) : 0; 134 else if (prim == GL_QUADS) 135 return length - length % 4; 136 else 137 return length; 138} 139 140 141static void brw_emit_prim(struct brw_context *brw, 142 const struct _mesa_prim *prim, 143 uint32_t hw_prim) 144{ 145 struct intel_context *intel = &brw->intel; 146 int verts_per_instance; 147 int vertex_access_type; 148 int start_vertex_location; 149 int base_vertex_location; 150 151 DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 152 prim->start, prim->count); 153 154 start_vertex_location = prim->start; 155 base_vertex_location = prim->basevertex; 156 if (prim->indexed) { 157 vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 158 start_vertex_location += brw->ib.start_vertex_offset; 159 base_vertex_location += brw->vb.start_vertex_bias; 160 } else { 161 vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 162 start_vertex_location += brw->vb.start_vertex_bias; 163 } 164 165 verts_per_instance = trim(prim->mode, prim->count); 166 167 /* If nothing to emit, just return. */ 168 if (verts_per_instance == 0) 169 return; 170 171 /* If we're set to always flush, do it before and after the primitive emit. 172 * We want to catch both missed flushes that hurt instruction/state cache 173 * and missed flushes of the render cache as it heads to other parts of 174 * the besides the draw code. 175 */ 176 if (intel->always_flush_cache) { 177 intel_batchbuffer_emit_mi_flush(intel); 178 } 179 180 BEGIN_BATCH(6); 181 OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | 182 hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | 183 vertex_access_type); 184 OUT_BATCH(verts_per_instance); 185 OUT_BATCH(start_vertex_location); 186 OUT_BATCH(1); // instance count 187 OUT_BATCH(0); // start instance location 188 OUT_BATCH(base_vertex_location); 189 ADVANCE_BATCH(); 190 191 intel->batch.need_workaround_flush = true; 192 193 if (intel->always_flush_cache) { 194 intel_batchbuffer_emit_mi_flush(intel); 195 } 196} 197 198static void gen7_emit_prim(struct brw_context *brw, 199 const struct _mesa_prim *prim, 200 uint32_t hw_prim) 201{ 202 struct intel_context *intel = &brw->intel; 203 int verts_per_instance; 204 int vertex_access_type; 205 int start_vertex_location; 206 int base_vertex_location; 207 208 DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 209 prim->start, prim->count); 210 211 start_vertex_location = prim->start; 212 base_vertex_location = prim->basevertex; 213 if (prim->indexed) { 214 vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM; 215 start_vertex_location += brw->ib.start_vertex_offset; 216 base_vertex_location += brw->vb.start_vertex_bias; 217 } else { 218 vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL; 219 start_vertex_location += brw->vb.start_vertex_bias; 220 } 221 222 verts_per_instance = trim(prim->mode, prim->count); 223 224 /* If nothing to emit, just return. */ 225 if (verts_per_instance == 0) 226 return; 227 228 /* If we're set to always flush, do it before and after the primitive emit. 229 * We want to catch both missed flushes that hurt instruction/state cache 230 * and missed flushes of the render cache as it heads to other parts of 231 * the besides the draw code. 232 */ 233 if (intel->always_flush_cache) { 234 intel_batchbuffer_emit_mi_flush(intel); 235 } 236 237 BEGIN_BATCH(7); 238 OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); 239 OUT_BATCH(hw_prim | vertex_access_type); 240 OUT_BATCH(verts_per_instance); 241 OUT_BATCH(start_vertex_location); 242 OUT_BATCH(1); // instance count 243 OUT_BATCH(0); // start instance location 244 OUT_BATCH(base_vertex_location); 245 ADVANCE_BATCH(); 246 247 if (intel->always_flush_cache) { 248 intel_batchbuffer_emit_mi_flush(intel); 249 } 250} 251 252 253static void brw_merge_inputs( struct brw_context *brw, 254 const struct gl_client_array *arrays[]) 255{ 256 struct brw_vertex_info old = brw->vb.info; 257 GLuint i; 258 259 for (i = 0; i < brw->vb.nr_buffers; i++) { 260 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 261 brw->vb.buffers[i].bo = NULL; 262 } 263 brw->vb.nr_buffers = 0; 264 265 memset(&brw->vb.info, 0, sizeof(brw->vb.info)); 266 267 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 268 brw->vb.inputs[i].buffer = -1; 269 brw->vb.inputs[i].glarray = arrays[i]; 270 brw->vb.inputs[i].attrib = (gl_vert_attrib) i; 271 272 if (arrays[i]->StrideB != 0) 273 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << 274 ((i%16) * 2); 275 } 276 277 /* Raise statechanges if input sizes have changed. */ 278 if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) 279 brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; 280} 281 282/* May fail if out of video memory for texture or vbo upload, or on 283 * fallback conditions. 284 */ 285static bool brw_try_draw_prims( struct gl_context *ctx, 286 const struct gl_client_array *arrays[], 287 const struct _mesa_prim *prim, 288 GLuint nr_prims, 289 const struct _mesa_index_buffer *ib, 290 GLuint min_index, 291 GLuint max_index ) 292{ 293 struct intel_context *intel = intel_context(ctx); 294 struct brw_context *brw = brw_context(ctx); 295 bool retval = false; 296 bool warn = false; 297 GLuint i; 298 299 if (ctx->NewState) 300 _mesa_update_state( ctx ); 301 302 /* We have to validate the textures *before* checking for fallbacks; 303 * otherwise, the software fallback won't be able to rely on the 304 * texture state, the firstLevel and lastLevel fields won't be 305 * set in the intel texture object (they'll both be 0), and the 306 * software fallback will segfault if it attempts to access any 307 * texture level other than level 0. 308 */ 309 brw_validate_textures( brw ); 310 311 /* Bind all inputs, derive varying and size information: 312 */ 313 brw_merge_inputs( brw, arrays ); 314 315 brw->ib.ib = ib; 316 brw->state.dirty.brw |= BRW_NEW_INDICES; 317 318 brw->vb.min_index = min_index; 319 brw->vb.max_index = max_index; 320 brw->state.dirty.brw |= BRW_NEW_VERTICES; 321 322 /* Have to validate state quite late. Will rebuild tnl_program, 323 * which depends on varying information. 324 * 325 * Note this is where brw->vs->prog_data.inputs_read is calculated, 326 * so can't access it earlier. 327 */ 328 329 intel_prepare_render(intel); 330 331 for (i = 0; i < nr_prims; i++) { 332 int estimated_max_prim_size; 333 334 estimated_max_prim_size = 512; /* batchbuffer commands */ 335 estimated_max_prim_size += (BRW_MAX_TEX_UNIT * 336 (sizeof(struct brw_sampler_state) + 337 sizeof(struct gen5_sampler_default_color))); 338 estimated_max_prim_size += 1024; /* gen6 VS push constants */ 339 estimated_max_prim_size += 1024; /* gen6 WM push constants */ 340 estimated_max_prim_size += 512; /* misc. pad */ 341 342 /* Flush the batch if it's approaching full, so that we don't wrap while 343 * we've got validated state that needs to be in the same batch as the 344 * primitives. 345 */ 346 intel_batchbuffer_require_space(intel, estimated_max_prim_size, false); 347 348 if (intel->gen < 6) 349 brw_set_prim(brw, &prim[i]); 350 else 351 gen6_set_prim(brw, &prim[i]); 352 353 if (brw->state.dirty.brw) { 354 brw_validate_state(brw); 355 356 /* Various fallback checks: */ 357 if (brw->intel.Fallback) 358 goto out; 359 360 /* Check that we can fit our state in with our existing batchbuffer, or 361 * flush otherwise. 362 */ 363 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, 364 brw->state.validated_bo_count)) { 365 static bool warned; 366 intel_batchbuffer_flush(intel); 367 368 /* Validate the state after we flushed the batch (which would have 369 * changed the set of dirty state). If we still fail to 370 * check_aperture, warn of what's happening, but attempt to continue 371 * on since it may succeed anyway, and the user would probably rather 372 * see a failure and a warning than a fallback. 373 */ 374 brw_validate_state(brw); 375 if (!warned && 376 dri_bufmgr_check_aperture_space(brw->state.validated_bos, 377 brw->state.validated_bo_count)) { 378 warn = true; 379 warned = true; 380 } 381 } 382 383 intel->no_batch_wrap = true; 384 brw_upload_state(brw); 385 } 386 387 if (intel->gen >= 7) 388 gen7_emit_prim(brw, &prim[i], brw->primitive); 389 else 390 brw_emit_prim(brw, &prim[i], brw->primitive); 391 392 intel->no_batch_wrap = false; 393 394 retval = true; 395 } 396 397 if (intel->always_flush_batch) 398 intel_batchbuffer_flush(intel); 399 out: 400 401 brw_state_cache_check_size(brw); 402 403 if (warn) 404 fprintf(stderr, "i965: Single primitive emit potentially exceeded " 405 "available aperture space\n"); 406 407 if (!retval) 408 DBG("%s failed\n", __FUNCTION__); 409 410 return retval; 411} 412 413void brw_draw_prims( struct gl_context *ctx, 414 const struct gl_client_array *arrays[], 415 const struct _mesa_prim *prim, 416 GLuint nr_prims, 417 const struct _mesa_index_buffer *ib, 418 GLboolean index_bounds_valid, 419 GLuint min_index, 420 GLuint max_index ) 421{ 422 bool retval; 423 424 if (!_mesa_check_conditional_render(ctx)) 425 return; 426 427 if (!vbo_all_varyings_in_vbos(arrays)) { 428 if (!index_bounds_valid) 429 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); 430 431 /* Decide if we want to rebase. If so we end up recursing once 432 * only into this function. 433 */ 434 if (min_index != 0 && !vbo_any_varyings_in_vbos(arrays)) { 435 vbo_rebase_prims(ctx, arrays, 436 prim, nr_prims, 437 ib, min_index, max_index, 438 brw_draw_prims ); 439 return; 440 } 441 } 442 443 /* Make a first attempt at drawing: 444 */ 445 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 446 447 /* Otherwise, we really are out of memory. Pass the drawing 448 * command to the software tnl module and which will in turn call 449 * swrast to do the drawing. 450 */ 451 if (!retval) { 452 _swsetup_Wakeup(ctx); 453 _tnl_wakeup(ctx); 454 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 455 } 456 457} 458 459void brw_draw_init( struct brw_context *brw ) 460{ 461 struct gl_context *ctx = &brw->intel.ctx; 462 struct vbo_context *vbo = vbo_context(ctx); 463 int i; 464 465 /* Register our drawing function: 466 */ 467 vbo->draw_prims = brw_draw_prims; 468 469 for (i = 0; i < VERT_ATTRIB_MAX; i++) 470 brw->vb.inputs[i].buffer = -1; 471 brw->vb.nr_buffers = 0; 472 brw->vb.nr_enabled = 0; 473} 474 475void brw_draw_destroy( struct brw_context *brw ) 476{ 477 int i; 478 479 for (i = 0; i < brw->vb.nr_buffers; i++) { 480 drm_intel_bo_unreference(brw->vb.buffers[i].bo); 481 brw->vb.buffers[i].bo = NULL; 482 } 483 brw->vb.nr_buffers = 0; 484 485 for (i = 0; i < brw->vb.nr_enabled; i++) { 486 brw->vb.enabled[i]->buffer = -1; 487 } 488 brw->vb.nr_enabled = 0; 489 490 drm_intel_bo_unreference(brw->ib.bo); 491 brw->ib.bo = NULL; 492} 493