brw_draw.c revision 605d428d20819ac3f46aaeb4a66707febec7ded2
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <stdlib.h> 29 30#include "glheader.h" 31#include "context.h" 32#include "state.h" 33#include "api_validate.h" 34#include "enums.h" 35 36#include "brw_draw.h" 37#include "brw_defines.h" 38#include "brw_context.h" 39#include "brw_aub.h" 40#include "brw_state.h" 41#include "brw_fallback.h" 42 43#include "intel_ioctl.h" 44#include "intel_batchbuffer.h" 45#include "intel_buffer_objects.h" 46 47#include "tnl/tnl.h" 48#include "vbo/vbo_context.h" 49 50 51 52 53static GLuint hw_prim[GL_POLYGON+1] = { 54 _3DPRIM_POINTLIST, 55 _3DPRIM_LINELIST, 56 _3DPRIM_LINELOOP, 57 _3DPRIM_LINESTRIP, 58 _3DPRIM_TRILIST, 59 _3DPRIM_TRISTRIP, 60 _3DPRIM_TRIFAN, 61 _3DPRIM_QUADLIST, 62 _3DPRIM_QUADSTRIP, 63 _3DPRIM_POLYGON 64}; 65 66 67static const GLenum reduced_prim[GL_POLYGON+1] = { 68 GL_POINTS, 69 GL_LINES, 70 GL_LINES, 71 GL_LINES, 72 GL_TRIANGLES, 73 GL_TRIANGLES, 74 GL_TRIANGLES, 75 GL_TRIANGLES, 76 GL_TRIANGLES, 77 GL_TRIANGLES 78}; 79 80 81/* When the primitive changes, set a state bit and re-validate. Not 82 * the nicest and would rather deal with this by having all the 83 * programs be immune to the active primitive (ie. cope with all 84 * possibilities). That may not be realistic however. 85 */ 86static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) 87{ 88 if (INTEL_DEBUG & DEBUG_PRIMS) 89 _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); 90 91 /* Slight optimization to avoid the GS program when not needed: 92 */ 93 if (prim == GL_QUAD_STRIP && 94 brw->attribs.Light->ShadeModel != GL_FLAT && 95 brw->attribs.Polygon->FrontMode == GL_FILL && 96 brw->attribs.Polygon->BackMode == GL_FILL) 97 prim = GL_TRIANGLE_STRIP; 98 99 if (prim != brw->primitive) { 100 brw->primitive = prim; 101 brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; 102 103 if (reduced_prim[prim] != brw->intel.reduced_primitive) { 104 brw->intel.reduced_primitive = reduced_prim[prim]; 105 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; 106 } 107 108 brw_validate_state(brw); 109 } 110 111 return hw_prim[prim]; 112} 113 114 115static GLuint trim(GLenum prim, GLuint length) 116{ 117 if (prim == GL_QUAD_STRIP) 118 return length > 3 ? (length - length % 2) : 0; 119 else if (prim == GL_QUADS) 120 return length - length % 4; 121 else 122 return length; 123} 124 125 126static void brw_emit_cliprect( struct brw_context *brw, 127 const drm_clip_rect_t *rect ) 128{ 129 struct brw_drawrect bdr; 130 131 bdr.header.opcode = CMD_DRAW_RECT; 132 bdr.header.length = sizeof(bdr)/4 - 2; 133 bdr.xmin = rect->x1; 134 bdr.xmax = rect->x2 - 1; 135 bdr.ymin = rect->y1; 136 bdr.ymax = rect->y2 - 1; 137 bdr.xorg = brw->intel.drawX; 138 bdr.yorg = brw->intel.drawY; 139 140 intel_batchbuffer_data( brw->intel.batch, &bdr, sizeof(bdr), 141 INTEL_BATCH_NO_CLIPRECTS); 142} 143 144 145static void brw_emit_prim( struct brw_context *brw, 146 const struct _mesa_prim *prim ) 147 148{ 149 struct brw_3d_primitive prim_packet; 150 151 if (INTEL_DEBUG & DEBUG_PRIMS) 152 _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 153 prim->start, prim->count); 154 155 prim_packet.header.opcode = CMD_3D_PRIM; 156 prim_packet.header.length = sizeof(prim_packet)/4 - 2; 157 prim_packet.header.pad = 0; 158 prim_packet.header.topology = brw_set_prim(brw, prim->mode); 159 prim_packet.header.indexed = prim->indexed; 160 161 prim_packet.verts_per_instance = trim(prim->mode, prim->count); 162 prim_packet.start_vert_location = prim->start; 163 prim_packet.instance_count = 1; 164 prim_packet.start_instance_location = 0; 165 prim_packet.base_vert_location = 0; 166 167 if (prim_packet.verts_per_instance) { 168 intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), 169 INTEL_BATCH_NO_CLIPRECTS); 170 } 171} 172 173static void brw_merge_inputs( struct brw_context *brw, 174 const struct gl_client_array *arrays[]) 175{ 176 struct brw_vertex_element *inputs = brw->vb.inputs; 177 struct brw_vertex_info old = brw->vb.info; 178 GLuint i; 179 180 memset(inputs, 0, sizeof(*inputs)); 181 memset(&brw->vb.info, 0, sizeof(brw->vb.info)); 182 183 for (i = 0; i < VERT_ATTRIB_MAX; i++) { 184 brw->vb.inputs[i].glarray = arrays[i]; 185 186 /* XXX: metaops passes null arrays */ 187 if (arrays[i]) { 188 if (arrays[i]->StrideB != 0) 189 brw->vb.info.varying |= 1 << i; 190 191 brw->vb.info.sizes[i/16] |= (inputs[i].glarray->Size - 1) << ((i%16) * 2); 192 } 193 } 194 195 /* Raise statechanges if input sizes and varying have changed: 196 */ 197 if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) 198 brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; 199 200 if (brw->vb.info.varying != old.varying) 201 brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING; 202} 203 204/* XXX: could split the primitive list to fallback only on the 205 * non-conformant primitives. 206 */ 207static GLboolean check_fallbacks( struct brw_context *brw, 208 const struct _mesa_prim *prim, 209 GLuint nr_prims ) 210{ 211 GLuint i; 212 213 if (!brw->intel.strict_conformance) 214 return GL_FALSE; 215 216 if (brw->attribs.Polygon->SmoothFlag) { 217 for (i = 0; i < nr_prims; i++) 218 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 219 return GL_TRUE; 220 } 221 222 /* BRW hardware will do AA lines, but they are non-conformant it 223 * seems. TBD whether we keep this fallback: 224 */ 225 if (brw->attribs.Line->SmoothFlag) { 226 for (i = 0; i < nr_prims; i++) 227 if (reduced_prim[prim[i].mode] == GL_LINES) 228 return GL_TRUE; 229 } 230 231 /* Stipple -- these fallbacks could be resolved with a little 232 * bit of work? 233 */ 234 if (brw->attribs.Line->StippleFlag) { 235 for (i = 0; i < nr_prims; i++) { 236 /* GS doesn't get enough information to know when to reset 237 * the stipple counter?!? 238 */ 239 if (prim[i].mode == GL_LINE_LOOP) 240 return GL_TRUE; 241 242 if (prim[i].mode == GL_POLYGON && 243 (brw->attribs.Polygon->FrontMode == GL_LINE || 244 brw->attribs.Polygon->BackMode == GL_LINE)) 245 return GL_TRUE; 246 } 247 } 248 249 250 if (brw->attribs.Point->SmoothFlag) { 251 for (i = 0; i < nr_prims; i++) 252 if (prim[i].mode == GL_POINTS) 253 return GL_TRUE; 254 } 255 256 return GL_FALSE; 257} 258 259/* May fail if out of video memory for texture or vbo upload, or on 260 * fallback conditions. 261 */ 262static GLboolean brw_try_draw_prims( GLcontext *ctx, 263 const struct gl_client_array *arrays[], 264 const struct _mesa_prim *prim, 265 GLuint nr_prims, 266 const struct _mesa_index_buffer *ib, 267 GLuint min_index, 268 GLuint max_index ) 269{ 270 struct intel_context *intel = intel_context(ctx); 271 struct brw_context *brw = brw_context(ctx); 272 GLboolean retval = GL_FALSE; 273 GLuint i, j; 274 275 if (ctx->NewState) 276 _mesa_update_state( ctx ); 277 278 /* Bind all inputs, derive varying and size information: 279 */ 280 brw_merge_inputs( brw, arrays ); 281 282 /* Have to validate state quite late. Will rebuild tnl_program, 283 * which depends on varying information. 284 * 285 * Note this is where brw->vs->prog_data.inputs_read is calculated, 286 * so can't access it earlier. 287 */ 288 289 LOCK_HARDWARE(intel); 290 291 if (brw->intel.numClipRects == 0) { 292 assert(intel->batch->ptr == intel->batch->map + intel->batch->offset); 293 UNLOCK_HARDWARE(intel); 294 return GL_TRUE; 295 } 296 297 { 298 /* Set the first primitive early, ahead of validate_state: 299 */ 300 brw_set_prim(brw, prim[0].mode); 301 302 /* XXX: Need to separate validate and upload of state. 303 */ 304 brw_validate_state( brw ); 305 306 /* Various fallback checks: 307 */ 308 if (brw->intel.Fallback) 309 goto out; 310 311 if (check_fallbacks( brw, prim, nr_prims )) 312 goto out; 313 314 /* Upload index, vertex data: 315 */ 316 if (ib) 317 brw_upload_indices( brw, ib ); 318 319 if (!brw_upload_vertices( brw, min_index, max_index)) { 320 goto out; 321 } 322 323 /* For single cliprect, state is already emitted: 324 */ 325 if (brw->intel.numClipRects == 1) { 326 for (i = 0; i < nr_prims; i++) { 327 brw_emit_prim(brw, &prim[i]); 328 } 329 } 330 else { 331 /* Otherwise, explicitly do the cliprects at this point: 332 */ 333 for (j = 0; j < brw->intel.numClipRects; j++) { 334 brw_emit_cliprect(brw, &brw->intel.pClipRects[j]); 335 336 /* Emit prims to batchbuffer: 337 */ 338 for (i = 0; i < nr_prims; i++) { 339 brw_emit_prim(brw, &prim[i]); 340 } 341 } 342 } 343 344 intel->need_flush = GL_TRUE; 345 retval = GL_TRUE; 346 } 347 348 out: 349 350 /* Currently have to do this to synchronize with the map/unmap of 351 * the vertex buffer in brw_exec_api.c. Not sure if there is any 352 * way around this, as not every flush is due to a buffer filling 353 * up. 354 */ 355 if (!intel_batchbuffer_flush( brw->intel.batch )) { 356 DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__); 357 retval = GL_FALSE; 358 } 359 360 if (retval && intel->thrashing) { 361 bmSetFence(intel); 362 } 363 364 /* Free any old data so it doesn't clog up texture memory - we 365 * won't be referencing it again. 366 */ 367 while (brw->vb.upload.wrap != brw->vb.upload.buf) { 368 ctx->Driver.BufferData(ctx, 369 GL_ARRAY_BUFFER_ARB, 370 BRW_UPLOAD_INIT_SIZE, 371 NULL, 372 GL_DYNAMIC_DRAW_ARB, 373 brw->vb.upload.vbo[brw->vb.upload.wrap]); 374 brw->vb.upload.wrap++; 375 brw->vb.upload.wrap %= BRW_NR_UPLOAD_BUFS; 376 } 377 378 UNLOCK_HARDWARE(intel); 379 380 if (!retval) 381 DBG("%s failed\n", __FUNCTION__); 382 383 return retval; 384} 385 386static GLboolean brw_need_rebase( GLcontext *ctx, 387 const struct gl_client_array *arrays[], 388 const struct _mesa_index_buffer *ib, 389 GLuint min_index ) 390{ 391 if (min_index == 0) 392 return GL_FALSE; 393 394 if (ib) { 395 if (!vbo_all_varyings_in_vbos(arrays)) 396 return GL_TRUE; 397 else 398 return GL_FALSE; 399 } 400 else { 401 /* Hmm. This isn't quite what I wanted. BRW can actually 402 * handle the mixed case well enough that we shouldn't need to 403 * rebase. However, it's probably not very common, nor hugely 404 * expensive to do it this way: 405 */ 406 if (!vbo_all_varyings_in_vbos(arrays)) 407 return GL_TRUE; 408 else 409 return GL_FALSE; 410 } 411} 412 413 414void brw_draw_prims( GLcontext *ctx, 415 const struct gl_client_array *arrays[], 416 const struct _mesa_prim *prim, 417 GLuint nr_prims, 418 const struct _mesa_index_buffer *ib, 419 GLuint min_index, 420 GLuint max_index ) 421{ 422 struct intel_context *intel = intel_context(ctx); 423 GLboolean retval; 424 425 /* Decide if we want to rebase. If so we end up recursing once 426 * only into this function. 427 */ 428 if (brw_need_rebase( ctx, arrays, ib, min_index )) { 429 vbo_rebase_prims( ctx, arrays, 430 prim, nr_prims, 431 ib, min_index, max_index, 432 brw_draw_prims ); 433 434 return; 435 } 436 437 438 /* Make a first attempt at drawing: 439 */ 440 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 441 442 443 /* This looks like out-of-memory but potentially we have 444 * situation where there is enough memory but it has become 445 * fragmented. Clear out all heaps and start from scratch by 446 * faking a contended lock event: (done elsewhere) 447 */ 448 if (!retval && !intel->Fallback && bmError(intel)) { 449 DBG("retrying\n"); 450 /* Then try a second time only to upload textures and draw the 451 * primitives: 452 */ 453 retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 454 } 455 456 /* Otherwise, we really are out of memory. Pass the drawing 457 * command to the software tnl module and which will in turn call 458 * swrast to do the drawing. 459 */ 460 if (!retval) { 461 _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); 462 } 463 464 if (intel->aub_file && (INTEL_DEBUG & DEBUG_SYNC)) { 465 intelFinish( &intel->ctx ); 466 intel->aub_wrap = 1; 467 } 468} 469 470 471static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr ) 472{ 473 /* nothing to do, we don't rely on the contents being preserved */ 474} 475 476 477void brw_draw_init( struct brw_context *brw ) 478{ 479 GLcontext *ctx = &brw->intel.ctx; 480 struct vbo_context *vbo = vbo_context(ctx); 481 GLuint i; 482 483 /* Register our drawing function: 484 */ 485 vbo->draw_prims = brw_draw_prims; 486 487 brw->vb.upload.size = BRW_UPLOAD_INIT_SIZE; 488 489 for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) { 490 brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB); 491 492 /* NOTE: These are set to no-backing-store. 493 */ 494 bmBufferSetInvalidateCB(&brw->intel, 495 intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])), 496 brw_invalidate_vbo_cb, 497 &brw->intel, 498 GL_TRUE); 499 } 500 501 ctx->Driver.BufferData( ctx, 502 GL_ARRAY_BUFFER_ARB, 503 BRW_UPLOAD_INIT_SIZE, 504 NULL, 505 GL_DYNAMIC_DRAW_ARB, 506 brw->vb.upload.vbo[0] ); 507} 508 509void brw_draw_destroy( struct brw_context *brw ) 510{ 511 GLcontext *ctx = &brw->intel.ctx; 512 GLuint i; 513 514 for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) 515 ctx->Driver.DeleteBuffer(ctx, brw->vb.upload.vbo[i]); 516} 517