intel_batchbuffer.c revision 65b096aeddd9b45ca038f44cc9adfff86c8c48b2
1/************************************************************************** 2 * 3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "intel_context.h" 29#include "intel_batchbuffer.h" 30#include "intel_buffer_objects.h" 31#include "intel_reg.h" 32#include "intel_bufmgr.h" 33#include "intel_buffers.h" 34 35struct cached_batch_item { 36 struct cached_batch_item *next; 37 uint16_t header; 38 uint16_t size; 39}; 40 41static void clear_cache( struct intel_context *intel ) 42{ 43 struct cached_batch_item *item = intel->batch.cached_items; 44 45 while (item) { 46 struct cached_batch_item *next = item->next; 47 free(item); 48 item = next; 49 } 50 51 intel->batch.cached_items = NULL; 52} 53 54void 55intel_batchbuffer_init(struct intel_context *intel) 56{ 57 intel_batchbuffer_reset(intel); 58 59 if (intel->gen == 6) { 60 /* We can't just use brw_state_batch to get a chunk of space for 61 * the gen6 workaround because it involves actually writing to 62 * the buffer, and the kernel doesn't let us write to the batch. 63 */ 64 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, 65 "gen6 workaround", 66 4096, 4096); 67 } 68} 69 70void 71intel_batchbuffer_reset(struct intel_context *intel) 72{ 73 if (intel->batch.last_bo != NULL) { 74 drm_intel_bo_unreference(intel->batch.last_bo); 75 intel->batch.last_bo = NULL; 76 } 77 intel->batch.last_bo = intel->batch.bo; 78 79 clear_cache(intel); 80 81 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 82 intel->maxBatchSize, 4096); 83 84 intel->batch.reserved_space = BATCH_RESERVED; 85 intel->batch.state_batch_offset = intel->batch.bo->size; 86 intel->batch.used = 0; 87 intel->batch.needs_sol_reset = false; 88} 89 90void 91intel_batchbuffer_save_state(struct intel_context *intel) 92{ 93 intel->batch.saved.used = intel->batch.used; 94 intel->batch.saved.reloc_count = 95 drm_intel_gem_bo_get_reloc_count(intel->batch.bo); 96} 97 98void 99intel_batchbuffer_reset_to_saved(struct intel_context *intel) 100{ 101 drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count); 102 103 intel->batch.used = intel->batch.saved.used; 104 105 /* Cached batch state is dead, since we just cleared some unknown part of the 106 * batchbuffer. Assume that the caller resets any other state necessary. 107 */ 108 clear_cache(intel); 109} 110 111void 112intel_batchbuffer_free(struct intel_context *intel) 113{ 114 drm_intel_bo_unreference(intel->batch.last_bo); 115 drm_intel_bo_unreference(intel->batch.bo); 116 drm_intel_bo_unreference(intel->batch.workaround_bo); 117 clear_cache(intel); 118} 119 120static void 121do_batch_dump(struct intel_context *intel) 122{ 123 struct drm_intel_decode *decode; 124 struct intel_batchbuffer *batch = &intel->batch; 125 int ret; 126 127 decode = drm_intel_decode_context_alloc(intel->intelScreen->deviceID); 128 if (!decode) 129 return; 130 131 ret = drm_intel_bo_map(batch->bo, false); 132 if (ret == 0) { 133 drm_intel_decode_set_batch_pointer(decode, 134 batch->bo->virtual, 135 batch->bo->offset, 136 batch->used); 137 } else { 138 fprintf(stderr, 139 "WARNING: failed to map batchbuffer (%s), " 140 "dumping uploaded data instead.\n", strerror(ret)); 141 142 drm_intel_decode_set_batch_pointer(decode, 143 batch->map, 144 batch->bo->offset, 145 batch->used); 146 } 147 148 drm_intel_decode(decode); 149 150 drm_intel_decode_context_free(decode); 151 152 if (ret == 0) { 153 drm_intel_bo_unmap(batch->bo); 154 155 if (intel->vtbl.debug_batch != NULL) 156 intel->vtbl.debug_batch(intel); 157 } 158} 159 160/* TODO: Push this whole function into bufmgr. 161 */ 162static int 163do_flush_locked(struct intel_context *intel) 164{ 165 struct intel_batchbuffer *batch = &intel->batch; 166 int ret = 0; 167 168 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 169 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 170 ret = drm_intel_bo_subdata(batch->bo, 171 batch->state_batch_offset, 172 batch->bo->size - batch->state_batch_offset, 173 (char *)batch->map + batch->state_batch_offset); 174 } 175 176 if (!intel->intelScreen->no_hw) { 177 int flags; 178 179 if (intel->gen < 6 || !batch->is_blit) { 180 flags = I915_EXEC_RENDER; 181 } else { 182 flags = I915_EXEC_BLT; 183 } 184 185 if (batch->needs_sol_reset) 186 flags |= I915_EXEC_GEN7_SOL_RESET; 187 188 if (ret == 0) 189 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, 190 flags); 191 } 192 193 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 194 do_batch_dump(intel); 195 196 if (ret != 0) { 197 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); 198 exit(1); 199 } 200 intel->vtbl.new_batch(intel); 201 202 return ret; 203} 204 205int 206_intel_batchbuffer_flush(struct intel_context *intel, 207 const char *file, int line) 208{ 209 int ret; 210 211 if (intel->batch.used == 0) 212 return 0; 213 214 if (intel->first_post_swapbuffers_batch == NULL) { 215 intel->first_post_swapbuffers_batch = intel->batch.bo; 216 drm_intel_bo_reference(intel->first_post_swapbuffers_batch); 217 } 218 219 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 220 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 221 4*intel->batch.used); 222 223 intel->batch.reserved_space = 0; 224 225 /* Mark the end of the buffer. */ 226 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); 227 if (intel->batch.used & 1) { 228 /* Round batchbuffer usage to 2 DWORDs. */ 229 intel_batchbuffer_emit_dword(intel, MI_NOOP); 230 } 231 232 if (intel->vtbl.finish_batch) 233 intel->vtbl.finish_batch(intel); 234 235 intel_upload_finish(intel); 236 237 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 238 assert(!intel->no_batch_wrap); 239 240 ret = do_flush_locked(intel); 241 242 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 243 fprintf(stderr, "waiting for idle\n"); 244 drm_intel_bo_wait_rendering(intel->batch.bo); 245 } 246 247 /* Reset the buffer: 248 */ 249 intel_batchbuffer_reset(intel); 250 251 return ret; 252} 253 254 255/* This is the only way buffers get added to the validate list. 256 */ 257bool 258intel_batchbuffer_emit_reloc(struct intel_context *intel, 259 drm_intel_bo *buffer, 260 uint32_t read_domains, uint32_t write_domain, 261 uint32_t delta) 262{ 263 int ret; 264 265 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, 266 buffer, delta, 267 read_domains, write_domain); 268 assert(ret == 0); 269 (void)ret; 270 271 /* 272 * Using the old buffer offset, write in what the right data would be, in case 273 * the buffer doesn't move and we can short-circuit the relocation processing 274 * in the kernel 275 */ 276 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 277 278 return true; 279} 280 281bool 282intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, 283 drm_intel_bo *buffer, 284 uint32_t read_domains, 285 uint32_t write_domain, 286 uint32_t delta) 287{ 288 int ret; 289 290 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, 291 buffer, delta, 292 read_domains, write_domain); 293 assert(ret == 0); 294 (void)ret; 295 296 /* 297 * Using the old buffer offset, write in what the right data would 298 * be, in case the buffer doesn't move and we can short-circuit the 299 * relocation processing in the kernel 300 */ 301 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 302 303 return true; 304} 305 306void 307intel_batchbuffer_data(struct intel_context *intel, 308 const void *data, GLuint bytes, bool is_blit) 309{ 310 assert((bytes & 3) == 0); 311 intel_batchbuffer_require_space(intel, bytes, is_blit); 312 __memcpy(intel->batch.map + intel->batch.used, data, bytes); 313 intel->batch.used += bytes >> 2; 314} 315 316void 317intel_batchbuffer_cached_advance(struct intel_context *intel) 318{ 319 struct cached_batch_item **prev = &intel->batch.cached_items, *item; 320 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); 321 uint32_t *start = intel->batch.map + intel->batch.emit; 322 uint16_t op = *start >> 16; 323 324 while (*prev) { 325 uint32_t *old; 326 327 item = *prev; 328 old = intel->batch.map + item->header; 329 if (op == *old >> 16) { 330 if (item->size == sz && memcmp(old, start, sz) == 0) { 331 if (prev != &intel->batch.cached_items) { 332 *prev = item->next; 333 item->next = intel->batch.cached_items; 334 intel->batch.cached_items = item; 335 } 336 intel->batch.used = intel->batch.emit; 337 return; 338 } 339 340 goto emit; 341 } 342 prev = &item->next; 343 } 344 345 item = malloc(sizeof(struct cached_batch_item)); 346 if (item == NULL) 347 return; 348 349 item->next = intel->batch.cached_items; 350 intel->batch.cached_items = item; 351 352emit: 353 item->size = sz; 354 item->header = intel->batch.emit; 355} 356 357/** 358 * Restriction [DevSNB, DevIVB]: 359 * 360 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of 361 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, 362 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall 363 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth 364 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by 365 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), 366 * unless SW can otherwise guarantee that the pipeline from WM onwards is 367 * already flushed (e.g., via a preceding MI_FLUSH). 368 */ 369void 370intel_emit_depth_stall_flushes(struct intel_context *intel) 371{ 372 assert(intel->gen >= 6 && intel->gen <= 7); 373 374 BEGIN_BATCH(4); 375 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 376 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 377 OUT_BATCH(0); /* address */ 378 OUT_BATCH(0); /* write data */ 379 ADVANCE_BATCH() 380 381 BEGIN_BATCH(4); 382 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 383 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH); 384 OUT_BATCH(0); /* address */ 385 OUT_BATCH(0); /* write data */ 386 ADVANCE_BATCH(); 387 388 BEGIN_BATCH(4); 389 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 390 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 391 OUT_BATCH(0); /* address */ 392 OUT_BATCH(0); /* write data */ 393 ADVANCE_BATCH(); 394} 395 396/** 397 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 398 * implementing two workarounds on gen6. From section 1.4.7.1 399 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 400 * 401 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 402 * produced by non-pipelined state commands), software needs to first 403 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 404 * 0. 405 * 406 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 407 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 408 * 409 * And the workaround for these two requires this workaround first: 410 * 411 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 412 * BEFORE the pipe-control with a post-sync op and no write-cache 413 * flushes. 414 * 415 * And this last workaround is tricky because of the requirements on 416 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 417 * volume 2 part 1: 418 * 419 * "1 of the following must also be set: 420 * - Render Target Cache Flush Enable ([12] of DW1) 421 * - Depth Cache Flush Enable ([0] of DW1) 422 * - Stall at Pixel Scoreboard ([1] of DW1) 423 * - Depth Stall ([13] of DW1) 424 * - Post-Sync Operation ([13] of DW1) 425 * - Notify Enable ([8] of DW1)" 426 * 427 * The cache flushes require the workaround flush that triggered this 428 * one, so we can't use it. Depth stall would trigger the same. 429 * Post-sync nonzero is what triggered this second workaround, so we 430 * can't use that one either. Notify enable is IRQs, which aren't 431 * really our business. That leaves only stall at scoreboard. 432 */ 433void 434intel_emit_post_sync_nonzero_flush(struct intel_context *intel) 435{ 436 if (!intel->batch.need_workaround_flush) 437 return; 438 439 BEGIN_BATCH(4); 440 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 441 OUT_BATCH(PIPE_CONTROL_CS_STALL | 442 PIPE_CONTROL_STALL_AT_SCOREBOARD); 443 OUT_BATCH(0); /* address */ 444 OUT_BATCH(0); /* write data */ 445 ADVANCE_BATCH(); 446 447 BEGIN_BATCH(4); 448 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 449 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); 450 OUT_RELOC(intel->batch.workaround_bo, 451 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 452 OUT_BATCH(0); /* write data */ 453 ADVANCE_BATCH(); 454 455 intel->batch.need_workaround_flush = false; 456} 457 458/* Emit a pipelined flush to either flush render and texture cache for 459 * reading from a FBO-drawn texture, or flush so that frontbuffer 460 * render appears on the screen in DRI1. 461 * 462 * This is also used for the always_flush_cache driconf debug option. 463 */ 464void 465intel_batchbuffer_emit_mi_flush(struct intel_context *intel) 466{ 467 if (intel->gen >= 6) { 468 if (intel->batch.is_blit) { 469 BEGIN_BATCH_BLT(4); 470 OUT_BATCH(MI_FLUSH_DW); 471 OUT_BATCH(0); 472 OUT_BATCH(0); 473 OUT_BATCH(0); 474 ADVANCE_BATCH(); 475 } else { 476 if (intel->gen == 6) { 477 /* Hardware workaround: SNB B-Spec says: 478 * 479 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache 480 * Flush Enable =1, a PIPE_CONTROL with any non-zero 481 * post-sync-op is required. 482 */ 483 intel_emit_post_sync_nonzero_flush(intel); 484 } 485 486 BEGIN_BATCH(4); 487 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 488 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | 489 PIPE_CONTROL_WRITE_FLUSH | 490 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 491 PIPE_CONTROL_VF_CACHE_INVALIDATE | 492 PIPE_CONTROL_TC_FLUSH | 493 PIPE_CONTROL_NO_WRITE | 494 PIPE_CONTROL_CS_STALL); 495 OUT_BATCH(0); /* write address */ 496 OUT_BATCH(0); /* write data */ 497 ADVANCE_BATCH(); 498 } 499 } else if (intel->gen >= 4) { 500 BEGIN_BATCH(4); 501 OUT_BATCH(_3DSTATE_PIPE_CONTROL | 502 PIPE_CONTROL_WRITE_FLUSH | 503 PIPE_CONTROL_NO_WRITE); 504 OUT_BATCH(0); /* write address */ 505 OUT_BATCH(0); /* write data */ 506 OUT_BATCH(0); /* write data */ 507 ADVANCE_BATCH(); 508 } else { 509 BEGIN_BATCH(1); 510 OUT_BATCH(MI_FLUSH); 511 ADVANCE_BATCH(); 512 } 513} 514