intel_batchbuffer.c revision 2e5a1a254ed81b1d3efa6064f48183eefac784d0
1/************************************************************************** 2 * 3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "intel_context.h" 29#include "intel_batchbuffer.h" 30#include "intel_buffer_objects.h" 31#include "intel_decode.h" 32#include "intel_reg.h" 33#include "intel_bufmgr.h" 34#include "intel_buffers.h" 35 36struct cached_batch_item { 37 struct cached_batch_item *next; 38 uint16_t header; 39 uint16_t size; 40}; 41 42static void clear_cache( struct intel_context *intel ) 43{ 44 struct cached_batch_item *item = intel->batch.cached_items; 45 46 while (item) { 47 struct cached_batch_item *next = item->next; 48 free(item); 49 item = next; 50 } 51 52 intel->batch.cached_items = NULL; 53} 54 55void 56intel_batchbuffer_init(struct intel_context *intel) 57{ 58 intel_batchbuffer_reset(intel); 59 60 if (intel->gen == 6) { 61 /* We can't just use brw_state_batch to get a chunk of space for 62 * the gen6 workaround because it involves actually writing to 63 * the buffer, and the kernel doesn't let us write to the batch. 64 */ 65 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, 66 "gen6 workaround", 67 4096, 4096); 68 } 69} 70 71void 72intel_batchbuffer_reset(struct intel_context *intel) 73{ 74 if (intel->batch.last_bo != NULL) { 75 drm_intel_bo_unreference(intel->batch.last_bo); 76 intel->batch.last_bo = NULL; 77 } 78 intel->batch.last_bo = intel->batch.bo; 79 80 clear_cache(intel); 81 82 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 83 intel->maxBatchSize, 4096); 84 85 intel->batch.reserved_space = BATCH_RESERVED; 86 intel->batch.state_batch_offset = intel->batch.bo->size; 87 intel->batch.used = 0; 88} 89 90void 91intel_batchbuffer_free(struct intel_context *intel) 92{ 93 drm_intel_bo_unreference(intel->batch.last_bo); 94 drm_intel_bo_unreference(intel->batch.bo); 95 drm_intel_bo_unreference(intel->batch.workaround_bo); 96 clear_cache(intel); 97} 98 99 100/* TODO: Push this whole function into bufmgr. 101 */ 102static void 103do_flush_locked(struct intel_context *intel) 104{ 105 struct intel_batchbuffer *batch = &intel->batch; 106 int ret = 0; 107 108 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 109 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 110 ret = drm_intel_bo_subdata(batch->bo, 111 batch->state_batch_offset, 112 batch->bo->size - batch->state_batch_offset, 113 (char *)batch->map + batch->state_batch_offset); 114 } 115 116 if (!intel->intelScreen->no_hw) { 117 int ring; 118 119 if (intel->gen < 6 || !batch->is_blit) { 120 ring = I915_EXEC_RENDER; 121 } else { 122 ring = I915_EXEC_BLT; 123 } 124 125 if (ret == 0) 126 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring); 127 } 128 129 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 130 drm_intel_bo_map(batch->bo, false); 131 intel_decode(batch->bo->virtual, batch->used, 132 batch->bo->offset, 133 intel->intelScreen->deviceID, true); 134 drm_intel_bo_unmap(batch->bo); 135 136 if (intel->vtbl.debug_batch != NULL) 137 intel->vtbl.debug_batch(intel); 138 } 139 140 if (ret != 0) { 141 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); 142 exit(1); 143 } 144 intel->vtbl.new_batch(intel); 145} 146 147void 148_intel_batchbuffer_flush(struct intel_context *intel, 149 const char *file, int line) 150{ 151 /* No batch should be emitted that uses a mapped region, because that would 152 * cause the map to be incoherent with GPU rendering done by the 153 * batchbuffer. To ensure that condition, we assert a condition that is 154 * stronger but easier to implement: that *no* region is mapped. 155 */ 156 assert(intel->num_mapped_regions == 0); 157 158 if (intel->batch.used == 0) 159 return; 160 161 if (intel->first_post_swapbuffers_batch == NULL) { 162 intel->first_post_swapbuffers_batch = intel->batch.bo; 163 drm_intel_bo_reference(intel->first_post_swapbuffers_batch); 164 } 165 166 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 167 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 168 4*intel->batch.used); 169 170 intel->batch.reserved_space = 0; 171 172 if (intel->always_flush_cache) { 173 intel_batchbuffer_emit_mi_flush(intel); 174 } 175 176 /* Mark the end of the buffer. */ 177 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); 178 if (intel->batch.used & 1) { 179 /* Round batchbuffer usage to 2 DWORDs. */ 180 intel_batchbuffer_emit_dword(intel, MI_NOOP); 181 } 182 183 if (intel->vtbl.finish_batch) 184 intel->vtbl.finish_batch(intel); 185 186 intel_upload_finish(intel); 187 188 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 189 assert(!intel->no_batch_wrap); 190 191 do_flush_locked(intel); 192 193 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 194 fprintf(stderr, "waiting for idle\n"); 195 drm_intel_bo_wait_rendering(intel->batch.bo); 196 } 197 198 /* Reset the buffer: 199 */ 200 intel_batchbuffer_reset(intel); 201} 202 203 204/* This is the only way buffers get added to the validate list. 205 */ 206bool 207intel_batchbuffer_emit_reloc(struct intel_context *intel, 208 drm_intel_bo *buffer, 209 uint32_t read_domains, uint32_t write_domain, 210 uint32_t delta) 211{ 212 int ret; 213 214 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, 215 buffer, delta, 216 read_domains, write_domain); 217 assert(ret == 0); 218 (void)ret; 219 220 /* 221 * Using the old buffer offset, write in what the right data would be, in case 222 * the buffer doesn't move and we can short-circuit the relocation processing 223 * in the kernel 224 */ 225 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 226 227 return true; 228} 229 230bool 231intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, 232 drm_intel_bo *buffer, 233 uint32_t read_domains, 234 uint32_t write_domain, 235 uint32_t delta) 236{ 237 int ret; 238 239 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, 240 buffer, delta, 241 read_domains, write_domain); 242 assert(ret == 0); 243 (void)ret; 244 245 /* 246 * Using the old buffer offset, write in what the right data would 247 * be, in case the buffer doesn't move and we can short-circuit the 248 * relocation processing in the kernel 249 */ 250 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 251 252 return true; 253} 254 255void 256intel_batchbuffer_data(struct intel_context *intel, 257 const void *data, GLuint bytes, bool is_blit) 258{ 259 assert((bytes & 3) == 0); 260 intel_batchbuffer_require_space(intel, bytes, is_blit); 261 __memcpy(intel->batch.map + intel->batch.used, data, bytes); 262 intel->batch.used += bytes >> 2; 263} 264 265void 266intel_batchbuffer_cached_advance(struct intel_context *intel) 267{ 268 struct cached_batch_item **prev = &intel->batch.cached_items, *item; 269 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); 270 uint32_t *start = intel->batch.map + intel->batch.emit; 271 uint16_t op = *start >> 16; 272 273 while (*prev) { 274 uint32_t *old; 275 276 item = *prev; 277 old = intel->batch.map + item->header; 278 if (op == *old >> 16) { 279 if (item->size == sz && memcmp(old, start, sz) == 0) { 280 if (prev != &intel->batch.cached_items) { 281 *prev = item->next; 282 item->next = intel->batch.cached_items; 283 intel->batch.cached_items = item; 284 } 285 intel->batch.used = intel->batch.emit; 286 return; 287 } 288 289 goto emit; 290 } 291 prev = &item->next; 292 } 293 294 item = malloc(sizeof(struct cached_batch_item)); 295 if (item == NULL) 296 return; 297 298 item->next = intel->batch.cached_items; 299 intel->batch.cached_items = item; 300 301emit: 302 item->size = sz; 303 item->header = intel->batch.emit; 304} 305 306/** 307 * Restriction [DevSNB, DevIVB]: 308 * 309 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of 310 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, 311 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall 312 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth 313 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by 314 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), 315 * unless SW can otherwise guarantee that the pipeline from WM onwards is 316 * already flushed (e.g., via a preceding MI_FLUSH). 317 */ 318void 319intel_emit_depth_stall_flushes(struct intel_context *intel) 320{ 321 assert(intel->gen >= 6 && intel->gen <= 7); 322 323 BEGIN_BATCH(4); 324 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 325 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 326 OUT_BATCH(0); /* address */ 327 OUT_BATCH(0); /* write data */ 328 ADVANCE_BATCH() 329 330 BEGIN_BATCH(4); 331 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 332 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH); 333 OUT_BATCH(0); /* address */ 334 OUT_BATCH(0); /* write data */ 335 ADVANCE_BATCH(); 336 337 BEGIN_BATCH(4); 338 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 339 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 340 OUT_BATCH(0); /* address */ 341 OUT_BATCH(0); /* write data */ 342 ADVANCE_BATCH(); 343} 344 345/** 346 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 347 * implementing two workarounds on gen6. From section 1.4.7.1 348 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 349 * 350 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 351 * produced by non-pipelined state commands), software needs to first 352 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 353 * 0. 354 * 355 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 356 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 357 * 358 * And the workaround for these two requires this workaround first: 359 * 360 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 361 * BEFORE the pipe-control with a post-sync op and no write-cache 362 * flushes. 363 * 364 * And this last workaround is tricky because of the requirements on 365 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 366 * volume 2 part 1: 367 * 368 * "1 of the following must also be set: 369 * - Render Target Cache Flush Enable ([12] of DW1) 370 * - Depth Cache Flush Enable ([0] of DW1) 371 * - Stall at Pixel Scoreboard ([1] of DW1) 372 * - Depth Stall ([13] of DW1) 373 * - Post-Sync Operation ([13] of DW1) 374 * - Notify Enable ([8] of DW1)" 375 * 376 * The cache flushes require the workaround flush that triggered this 377 * one, so we can't use it. Depth stall would trigger the same. 378 * Post-sync nonzero is what triggered this second workaround, so we 379 * can't use that one either. Notify enable is IRQs, which aren't 380 * really our business. That leaves only stall at scoreboard. 381 */ 382void 383intel_emit_post_sync_nonzero_flush(struct intel_context *intel) 384{ 385 if (!intel->batch.need_workaround_flush) 386 return; 387 388 BEGIN_BATCH(4); 389 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 390 OUT_BATCH(PIPE_CONTROL_CS_STALL | 391 PIPE_CONTROL_STALL_AT_SCOREBOARD); 392 OUT_BATCH(0); /* address */ 393 OUT_BATCH(0); /* write data */ 394 ADVANCE_BATCH(); 395 396 BEGIN_BATCH(4); 397 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 398 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); 399 OUT_RELOC(intel->batch.workaround_bo, 400 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 401 OUT_BATCH(0); /* write data */ 402 ADVANCE_BATCH(); 403 404 intel->batch.need_workaround_flush = false; 405} 406 407/* Emit a pipelined flush to either flush render and texture cache for 408 * reading from a FBO-drawn texture, or flush so that frontbuffer 409 * render appears on the screen in DRI1. 410 * 411 * This is also used for the always_flush_cache driconf debug option. 412 */ 413void 414intel_batchbuffer_emit_mi_flush(struct intel_context *intel) 415{ 416 if (intel->gen >= 6) { 417 if (intel->batch.is_blit) { 418 BEGIN_BATCH_BLT(4); 419 OUT_BATCH(MI_FLUSH_DW); 420 OUT_BATCH(0); 421 OUT_BATCH(0); 422 OUT_BATCH(0); 423 ADVANCE_BATCH(); 424 } else { 425 if (intel->gen == 6) { 426 /* Hardware workaround: SNB B-Spec says: 427 * 428 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache 429 * Flush Enable =1, a PIPE_CONTROL with any non-zero 430 * post-sync-op is required. 431 */ 432 intel_emit_post_sync_nonzero_flush(intel); 433 } 434 435 BEGIN_BATCH(4); 436 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 437 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | 438 PIPE_CONTROL_WRITE_FLUSH | 439 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 440 PIPE_CONTROL_TC_FLUSH | 441 PIPE_CONTROL_NO_WRITE); 442 OUT_BATCH(0); /* write address */ 443 OUT_BATCH(0); /* write data */ 444 ADVANCE_BATCH(); 445 } 446 } else if (intel->gen >= 4) { 447 BEGIN_BATCH(4); 448 OUT_BATCH(_3DSTATE_PIPE_CONTROL | 449 PIPE_CONTROL_WRITE_FLUSH | 450 PIPE_CONTROL_NO_WRITE); 451 OUT_BATCH(0); /* write address */ 452 OUT_BATCH(0); /* write data */ 453 OUT_BATCH(0); /* write data */ 454 ADVANCE_BATCH(); 455 } else { 456 BEGIN_BATCH(1); 457 OUT_BATCH(MI_FLUSH); 458 ADVANCE_BATCH(); 459 } 460} 461