intel_batchbuffer.c revision 8bd27a5b080157cb1d5fc0383ce45574c7b16aa5
1/************************************************************************** 2 * 3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "intel_context.h" 29#include "intel_batchbuffer.h" 30#include "intel_buffer_objects.h" 31#include "intel_decode.h" 32#include "intel_reg.h" 33#include "intel_bufmgr.h" 34#include "intel_buffers.h" 35 36struct cached_batch_item { 37 struct cached_batch_item *next; 38 uint16_t header; 39 uint16_t size; 40}; 41 42static void clear_cache( struct intel_context *intel ) 43{ 44 struct cached_batch_item *item = intel->batch.cached_items; 45 46 while (item) { 47 struct cached_batch_item *next = item->next; 48 free(item); 49 item = next; 50 } 51 52 intel->batch.cached_items = NULL; 53} 54 55void 56intel_batchbuffer_init(struct intel_context *intel) 57{ 58 intel_batchbuffer_reset(intel); 59 60 if (intel->gen == 6) { 61 /* We can't just use brw_state_batch to get a chunk of space for 62 * the gen6 workaround because it involves actually writing to 63 * the buffer, and the kernel doesn't let us write to the batch. 64 */ 65 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, 66 "gen6 workaround", 67 4096, 4096); 68 } 69} 70 71void 72intel_batchbuffer_reset(struct intel_context *intel) 73{ 74 if (intel->batch.last_bo != NULL) { 75 drm_intel_bo_unreference(intel->batch.last_bo); 76 intel->batch.last_bo = NULL; 77 } 78 intel->batch.last_bo = intel->batch.bo; 79 80 clear_cache(intel); 81 82 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 83 intel->maxBatchSize, 4096); 84 85 intel->batch.reserved_space = BATCH_RESERVED; 86 intel->batch.state_batch_offset = intel->batch.bo->size; 87 intel->batch.used = 0; 88} 89 90void 91intel_batchbuffer_free(struct intel_context *intel) 92{ 93 drm_intel_bo_unreference(intel->batch.last_bo); 94 drm_intel_bo_unreference(intel->batch.bo); 95 drm_intel_bo_unreference(intel->batch.workaround_bo); 96 clear_cache(intel); 97} 98 99 100/* TODO: Push this whole function into bufmgr. 101 */ 102static void 103do_flush_locked(struct intel_context *intel) 104{ 105 struct intel_batchbuffer *batch = &intel->batch; 106 int ret = 0; 107 108 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 109 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 110 ret = drm_intel_bo_subdata(batch->bo, 111 batch->state_batch_offset, 112 batch->bo->size - batch->state_batch_offset, 113 (char *)batch->map + batch->state_batch_offset); 114 } 115 116 if (!intel->intelScreen->no_hw) { 117 int ring; 118 119 if (intel->gen < 6 || !batch->is_blit) { 120 ring = I915_EXEC_RENDER; 121 } else { 122 ring = I915_EXEC_BLT; 123 } 124 125 if (ret == 0) 126 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring); 127 } 128 129 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 130 drm_intel_bo_map(batch->bo, false); 131 intel_decode(batch->bo->virtual, batch->used, 132 batch->bo->offset, 133 intel->intelScreen->deviceID, GL_TRUE); 134 drm_intel_bo_unmap(batch->bo); 135 136 if (intel->vtbl.debug_batch != NULL) 137 intel->vtbl.debug_batch(intel); 138 } 139 140 if (ret != 0) { 141 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret)); 142 exit(1); 143 } 144 intel->vtbl.new_batch(intel); 145} 146 147void 148_intel_batchbuffer_flush(struct intel_context *intel, 149 const char *file, int line) 150{ 151 if (intel->batch.used == 0) 152 return; 153 154 if (intel->first_post_swapbuffers_batch == NULL) { 155 intel->first_post_swapbuffers_batch = intel->batch.bo; 156 drm_intel_bo_reference(intel->first_post_swapbuffers_batch); 157 } 158 159 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 160 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 161 4*intel->batch.used); 162 163 intel->batch.reserved_space = 0; 164 165 if (intel->always_flush_cache) { 166 intel_batchbuffer_emit_mi_flush(intel); 167 } 168 169 /* Mark the end of the buffer. */ 170 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); 171 if (intel->batch.used & 1) { 172 /* Round batchbuffer usage to 2 DWORDs. */ 173 intel_batchbuffer_emit_dword(intel, MI_NOOP); 174 } 175 176 if (intel->vtbl.finish_batch) 177 intel->vtbl.finish_batch(intel); 178 179 intel_upload_finish(intel); 180 181 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 182 assert(!intel->no_batch_wrap); 183 184 do_flush_locked(intel); 185 186 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 187 fprintf(stderr, "waiting for idle\n"); 188 drm_intel_bo_wait_rendering(intel->batch.bo); 189 } 190 191 /* Reset the buffer: 192 */ 193 intel_batchbuffer_reset(intel); 194} 195 196 197/* This is the only way buffers get added to the validate list. 198 */ 199GLboolean 200intel_batchbuffer_emit_reloc(struct intel_context *intel, 201 drm_intel_bo *buffer, 202 uint32_t read_domains, uint32_t write_domain, 203 uint32_t delta) 204{ 205 int ret; 206 207 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, 208 buffer, delta, 209 read_domains, write_domain); 210 assert(ret == 0); 211 (void)ret; 212 213 /* 214 * Using the old buffer offset, write in what the right data would be, in case 215 * the buffer doesn't move and we can short-circuit the relocation processing 216 * in the kernel 217 */ 218 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 219 220 return GL_TRUE; 221} 222 223GLboolean 224intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, 225 drm_intel_bo *buffer, 226 uint32_t read_domains, 227 uint32_t write_domain, 228 uint32_t delta) 229{ 230 int ret; 231 232 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, 233 buffer, delta, 234 read_domains, write_domain); 235 assert(ret == 0); 236 (void)ret; 237 238 /* 239 * Using the old buffer offset, write in what the right data would 240 * be, in case the buffer doesn't move and we can short-circuit the 241 * relocation processing in the kernel 242 */ 243 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 244 245 return GL_TRUE; 246} 247 248void 249intel_batchbuffer_data(struct intel_context *intel, 250 const void *data, GLuint bytes, bool is_blit) 251{ 252 assert((bytes & 3) == 0); 253 intel_batchbuffer_require_space(intel, bytes, is_blit); 254 __memcpy(intel->batch.map + intel->batch.used, data, bytes); 255 intel->batch.used += bytes >> 2; 256} 257 258void 259intel_batchbuffer_cached_advance(struct intel_context *intel) 260{ 261 struct cached_batch_item **prev = &intel->batch.cached_items, *item; 262 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); 263 uint32_t *start = intel->batch.map + intel->batch.emit; 264 uint16_t op = *start >> 16; 265 266 while (*prev) { 267 uint32_t *old; 268 269 item = *prev; 270 old = intel->batch.map + item->header; 271 if (op == *old >> 16) { 272 if (item->size == sz && memcmp(old, start, sz) == 0) { 273 if (prev != &intel->batch.cached_items) { 274 *prev = item->next; 275 item->next = intel->batch.cached_items; 276 intel->batch.cached_items = item; 277 } 278 intel->batch.used = intel->batch.emit; 279 return; 280 } 281 282 goto emit; 283 } 284 prev = &item->next; 285 } 286 287 item = malloc(sizeof(struct cached_batch_item)); 288 if (item == NULL) 289 return; 290 291 item->next = intel->batch.cached_items; 292 intel->batch.cached_items = item; 293 294emit: 295 item->size = sz; 296 item->header = intel->batch.emit; 297} 298 299/** 300 * Restriction [DevSNB, DevIVB]: 301 * 302 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of 303 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER, 304 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall 305 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth 306 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by 307 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set), 308 * unless SW can otherwise guarantee that the pipeline from WM onwards is 309 * already flushed (e.g., via a preceding MI_FLUSH). 310 */ 311void 312intel_emit_depth_stall_flushes(struct intel_context *intel) 313{ 314 assert(intel->gen >= 6 && intel->gen <= 7); 315 316 BEGIN_BATCH(4); 317 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 318 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 319 OUT_BATCH(0); /* address */ 320 OUT_BATCH(0); /* write data */ 321 ADVANCE_BATCH() 322 323 BEGIN_BATCH(4); 324 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 325 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH); 326 OUT_BATCH(0); /* address */ 327 OUT_BATCH(0); /* write data */ 328 ADVANCE_BATCH(); 329 330 BEGIN_BATCH(4); 331 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 332 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL); 333 OUT_BATCH(0); /* address */ 334 OUT_BATCH(0); /* write data */ 335 ADVANCE_BATCH(); 336} 337 338/** 339 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 340 * implementing two workarounds on gen6. From section 1.4.7.1 341 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 342 * 343 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 344 * produced by non-pipelined state commands), software needs to first 345 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 346 * 0. 347 * 348 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 349 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 350 * 351 * And the workaround for these two requires this workaround first: 352 * 353 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 354 * BEFORE the pipe-control with a post-sync op and no write-cache 355 * flushes. 356 * 357 * And this last workaround is tricky because of the requirements on 358 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 359 * volume 2 part 1: 360 * 361 * "1 of the following must also be set: 362 * - Render Target Cache Flush Enable ([12] of DW1) 363 * - Depth Cache Flush Enable ([0] of DW1) 364 * - Stall at Pixel Scoreboard ([1] of DW1) 365 * - Depth Stall ([13] of DW1) 366 * - Post-Sync Operation ([13] of DW1) 367 * - Notify Enable ([8] of DW1)" 368 * 369 * The cache flushes require the workaround flush that triggered this 370 * one, so we can't use it. Depth stall would trigger the same. 371 * Post-sync nonzero is what triggered this second workaround, so we 372 * can't use that one either. Notify enable is IRQs, which aren't 373 * really our business. That leaves only stall at scoreboard. 374 */ 375void 376intel_emit_post_sync_nonzero_flush(struct intel_context *intel) 377{ 378 if (!intel->batch.need_workaround_flush) 379 return; 380 381 BEGIN_BATCH(4); 382 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 383 OUT_BATCH(PIPE_CONTROL_CS_STALL | 384 PIPE_CONTROL_STALL_AT_SCOREBOARD); 385 OUT_BATCH(0); /* address */ 386 OUT_BATCH(0); /* write data */ 387 ADVANCE_BATCH(); 388 389 BEGIN_BATCH(4); 390 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 391 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); 392 OUT_RELOC(intel->batch.workaround_bo, 393 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 394 OUT_BATCH(0); /* write data */ 395 ADVANCE_BATCH(); 396 397 intel->batch.need_workaround_flush = false; 398} 399 400/* Emit a pipelined flush to either flush render and texture cache for 401 * reading from a FBO-drawn texture, or flush so that frontbuffer 402 * render appears on the screen in DRI1. 403 * 404 * This is also used for the always_flush_cache driconf debug option. 405 */ 406void 407intel_batchbuffer_emit_mi_flush(struct intel_context *intel) 408{ 409 if (intel->gen >= 6) { 410 if (intel->batch.is_blit) { 411 BEGIN_BATCH_BLT(4); 412 OUT_BATCH(MI_FLUSH_DW); 413 OUT_BATCH(0); 414 OUT_BATCH(0); 415 OUT_BATCH(0); 416 ADVANCE_BATCH(); 417 } else { 418 if (intel->gen == 6) { 419 /* Hardware workaround: SNB B-Spec says: 420 * 421 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache 422 * Flush Enable =1, a PIPE_CONTROL with any non-zero 423 * post-sync-op is required. 424 */ 425 intel_emit_post_sync_nonzero_flush(intel); 426 } 427 428 BEGIN_BATCH(4); 429 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 430 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | 431 PIPE_CONTROL_WRITE_FLUSH | 432 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 433 PIPE_CONTROL_TC_FLUSH | 434 PIPE_CONTROL_NO_WRITE); 435 OUT_BATCH(0); /* write address */ 436 OUT_BATCH(0); /* write data */ 437 ADVANCE_BATCH(); 438 } 439 } else if (intel->gen >= 4) { 440 BEGIN_BATCH(4); 441 OUT_BATCH(_3DSTATE_PIPE_CONTROL | 442 PIPE_CONTROL_WRITE_FLUSH | 443 PIPE_CONTROL_NO_WRITE); 444 OUT_BATCH(0); /* write address */ 445 OUT_BATCH(0); /* write data */ 446 OUT_BATCH(0); /* write data */ 447 ADVANCE_BATCH(); 448 } else { 449 BEGIN_BATCH(1); 450 OUT_BATCH(MI_FLUSH); 451 ADVANCE_BATCH(); 452 } 453} 454