intel_batchbuffer.c revision 0a00a9a05b357dafae86bf8af879aa601f101eba
1/************************************************************************** 2 * 3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "intel_context.h" 29#include "intel_batchbuffer.h" 30#include "intel_buffer_objects.h" 31#include "intel_decode.h" 32#include "intel_reg.h" 33#include "intel_bufmgr.h" 34#include "intel_buffers.h" 35 36struct cached_batch_item { 37 struct cached_batch_item *next; 38 uint16_t header; 39 uint16_t size; 40}; 41 42static void clear_cache( struct intel_context *intel ) 43{ 44 struct cached_batch_item *item = intel->batch.cached_items; 45 46 while (item) { 47 struct cached_batch_item *next = item->next; 48 free(item); 49 item = next; 50 } 51 52 intel->batch.cached_items = NULL; 53} 54 55void 56intel_batchbuffer_init(struct intel_context *intel) 57{ 58 intel_batchbuffer_reset(intel); 59 60 if (intel->gen == 6) { 61 /* We can't just use brw_state_batch to get a chunk of space for 62 * the gen6 workaround because it involves actually writing to 63 * the buffer, and the kernel doesn't let us write to the batch. 64 */ 65 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, 66 "gen6 workaround", 67 4096, 4096); 68 } 69} 70 71void 72intel_batchbuffer_reset(struct intel_context *intel) 73{ 74 if (intel->batch.last_bo != NULL) { 75 drm_intel_bo_unreference(intel->batch.last_bo); 76 intel->batch.last_bo = NULL; 77 } 78 intel->batch.last_bo = intel->batch.bo; 79 80 clear_cache(intel); 81 82 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 83 intel->maxBatchSize, 4096); 84 85 intel->batch.reserved_space = BATCH_RESERVED; 86 intel->batch.state_batch_offset = intel->batch.bo->size; 87 intel->batch.used = 0; 88} 89 90void 91intel_batchbuffer_free(struct intel_context *intel) 92{ 93 drm_intel_bo_unreference(intel->batch.last_bo); 94 drm_intel_bo_unreference(intel->batch.bo); 95 drm_intel_bo_unreference(intel->batch.workaround_bo); 96 clear_cache(intel); 97} 98 99 100/* TODO: Push this whole function into bufmgr. 101 */ 102static void 103do_flush_locked(struct intel_context *intel) 104{ 105 struct intel_batchbuffer *batch = &intel->batch; 106 int ret = 0; 107 108 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 109 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 110 ret = drm_intel_bo_subdata(batch->bo, 111 batch->state_batch_offset, 112 batch->bo->size - batch->state_batch_offset, 113 (char *)batch->map + batch->state_batch_offset); 114 } 115 116 if (!intel->intelScreen->no_hw) { 117 int ring; 118 119 if (intel->gen < 6 || !batch->is_blit) { 120 ring = I915_EXEC_RENDER; 121 } else { 122 ring = I915_EXEC_BLT; 123 } 124 125 if (ret == 0) 126 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring); 127 } 128 129 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 130 drm_intel_bo_map(batch->bo, false); 131 intel_decode(batch->bo->virtual, batch->used, 132 batch->bo->offset, 133 intel->intelScreen->deviceID, GL_TRUE); 134 drm_intel_bo_unmap(batch->bo); 135 136 if (intel->vtbl.debug_batch != NULL) 137 intel->vtbl.debug_batch(intel); 138 } 139 140 if (ret != 0) { 141 fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(ret)); 142 exit(1); 143 } 144 intel->vtbl.new_batch(intel); 145} 146 147void 148_intel_batchbuffer_flush(struct intel_context *intel, 149 const char *file, int line) 150{ 151 if (intel->batch.used == 0) 152 return; 153 154 if (intel->first_post_swapbuffers_batch == NULL) { 155 intel->first_post_swapbuffers_batch = intel->batch.bo; 156 drm_intel_bo_reference(intel->first_post_swapbuffers_batch); 157 } 158 159 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 160 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 161 4*intel->batch.used); 162 163 intel->batch.reserved_space = 0; 164 165 if (intel->always_flush_cache) { 166 intel_batchbuffer_emit_mi_flush(intel); 167 } 168 169 /* Mark the end of the buffer. */ 170 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); 171 if (intel->batch.used & 1) { 172 /* Round batchbuffer usage to 2 DWORDs. */ 173 intel_batchbuffer_emit_dword(intel, MI_NOOP); 174 } 175 176 if (intel->vtbl.finish_batch) 177 intel->vtbl.finish_batch(intel); 178 179 intel_upload_finish(intel); 180 181 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 182 assert(!intel->no_batch_wrap); 183 184 do_flush_locked(intel); 185 186 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 187 fprintf(stderr, "waiting for idle\n"); 188 drm_intel_bo_wait_rendering(intel->batch.bo); 189 } 190 191 /* Reset the buffer: 192 */ 193 intel_batchbuffer_reset(intel); 194} 195 196 197/* This is the only way buffers get added to the validate list. 198 */ 199GLboolean 200intel_batchbuffer_emit_reloc(struct intel_context *intel, 201 drm_intel_bo *buffer, 202 uint32_t read_domains, uint32_t write_domain, 203 uint32_t delta) 204{ 205 int ret; 206 207 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, 208 buffer, delta, 209 read_domains, write_domain); 210 assert(ret == 0); 211 (void)ret; 212 213 /* 214 * Using the old buffer offset, write in what the right data would be, in case 215 * the buffer doesn't move and we can short-circuit the relocation processing 216 * in the kernel 217 */ 218 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 219 220 return GL_TRUE; 221} 222 223GLboolean 224intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, 225 drm_intel_bo *buffer, 226 uint32_t read_domains, 227 uint32_t write_domain, 228 uint32_t delta) 229{ 230 int ret; 231 232 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, 233 buffer, delta, 234 read_domains, write_domain); 235 assert(ret == 0); 236 (void)ret; 237 238 /* 239 * Using the old buffer offset, write in what the right data would 240 * be, in case the buffer doesn't move and we can short-circuit the 241 * relocation processing in the kernel 242 */ 243 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 244 245 return GL_TRUE; 246} 247 248void 249intel_batchbuffer_data(struct intel_context *intel, 250 const void *data, GLuint bytes, bool is_blit) 251{ 252 assert((bytes & 3) == 0); 253 intel_batchbuffer_require_space(intel, bytes, is_blit); 254 __memcpy(intel->batch.map + intel->batch.used, data, bytes); 255 intel->batch.used += bytes >> 2; 256} 257 258void 259intel_batchbuffer_cached_advance(struct intel_context *intel) 260{ 261 struct cached_batch_item **prev = &intel->batch.cached_items, *item; 262 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); 263 uint32_t *start = intel->batch.map + intel->batch.emit; 264 uint16_t op = *start >> 16; 265 266 while (*prev) { 267 uint32_t *old; 268 269 item = *prev; 270 old = intel->batch.map + item->header; 271 if (op == *old >> 16) { 272 if (item->size == sz && memcmp(old, start, sz) == 0) { 273 if (prev != &intel->batch.cached_items) { 274 *prev = item->next; 275 item->next = intel->batch.cached_items; 276 intel->batch.cached_items = item; 277 } 278 intel->batch.used = intel->batch.emit; 279 return; 280 } 281 282 goto emit; 283 } 284 prev = &item->next; 285 } 286 287 item = malloc(sizeof(struct cached_batch_item)); 288 if (item == NULL) 289 return; 290 291 item->next = intel->batch.cached_items; 292 intel->batch.cached_items = item; 293 294emit: 295 item->size = sz; 296 item->header = intel->batch.emit; 297} 298 299/** 300 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 301 * implementing two workarounds on gen6. From section 1.4.7.1 302 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 303 * 304 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 305 * produced by non-pipelined state commands), software needs to first 306 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 307 * 0. 308 * 309 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 310 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 311 * 312 * And the workaround for these two requires this workaround first: 313 * 314 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 315 * BEFORE the pipe-control with a post-sync op and no write-cache 316 * flushes. 317 * 318 * And this last workaround is tricky because of the requirements on 319 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 320 * volume 2 part 1: 321 * 322 * "1 of the following must also be set: 323 * - Render Target Cache Flush Enable ([12] of DW1) 324 * - Depth Cache Flush Enable ([0] of DW1) 325 * - Stall at Pixel Scoreboard ([1] of DW1) 326 * - Depth Stall ([13] of DW1) 327 * - Post-Sync Operation ([13] of DW1) 328 * - Notify Enable ([8] of DW1)" 329 * 330 * The cache flushes require the workaround flush that triggered this 331 * one, so we can't use it. Depth stall would trigger the same. 332 * Post-sync nonzero is what triggered this second workaround, so we 333 * can't use that one either. Notify enable is IRQs, which aren't 334 * really our business. That leaves only stall at scoreboard. 335 */ 336void 337intel_emit_post_sync_nonzero_flush(struct intel_context *intel) 338{ 339 if (!intel->batch.need_workaround_flush) 340 return; 341 342 BEGIN_BATCH(4); 343 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 344 OUT_BATCH(PIPE_CONTROL_CS_STALL | 345 PIPE_CONTROL_STALL_AT_SCOREBOARD); 346 OUT_BATCH(0); /* address */ 347 OUT_BATCH(0); /* write data */ 348 ADVANCE_BATCH(); 349 350 BEGIN_BATCH(4); 351 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 352 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); 353 OUT_RELOC(intel->batch.workaround_bo, 354 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 355 OUT_BATCH(0); /* write data */ 356 ADVANCE_BATCH(); 357 358 intel->batch.need_workaround_flush = false; 359} 360 361/* Emit a pipelined flush to either flush render and texture cache for 362 * reading from a FBO-drawn texture, or flush so that frontbuffer 363 * render appears on the screen in DRI1. 364 * 365 * This is also used for the always_flush_cache driconf debug option. 366 */ 367void 368intel_batchbuffer_emit_mi_flush(struct intel_context *intel) 369{ 370 if (intel->gen >= 6) { 371 if (intel->batch.is_blit) { 372 BEGIN_BATCH_BLT(4); 373 OUT_BATCH(MI_FLUSH_DW); 374 OUT_BATCH(0); 375 OUT_BATCH(0); 376 OUT_BATCH(0); 377 ADVANCE_BATCH(); 378 } else { 379 if (intel->gen == 6) { 380 /* Hardware workaround: SNB B-Spec says: 381 * 382 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache 383 * Flush Enable =1, a PIPE_CONTROL with any non-zero 384 * post-sync-op is required. 385 */ 386 intel_emit_post_sync_nonzero_flush(intel); 387 } 388 389 BEGIN_BATCH(4); 390 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 391 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | 392 PIPE_CONTROL_WRITE_FLUSH | 393 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 394 PIPE_CONTROL_TC_FLUSH | 395 PIPE_CONTROL_NO_WRITE); 396 OUT_BATCH(0); /* write address */ 397 OUT_BATCH(0); /* write data */ 398 ADVANCE_BATCH(); 399 } 400 } else if (intel->gen >= 4) { 401 BEGIN_BATCH(4); 402 OUT_BATCH(_3DSTATE_PIPE_CONTROL | 403 PIPE_CONTROL_WRITE_FLUSH | 404 PIPE_CONTROL_NO_WRITE); 405 OUT_BATCH(0); /* write address */ 406 OUT_BATCH(0); /* write data */ 407 OUT_BATCH(0); /* write data */ 408 ADVANCE_BATCH(); 409 } else { 410 BEGIN_BATCH(1); 411 OUT_BATCH(MI_FLUSH); 412 ADVANCE_BATCH(); 413 } 414} 415