intel_batchbuffer.c revision 407785d0e97abd0cc51a6e360089111973748e7c
1/************************************************************************** 2 * 3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "intel_context.h" 29#include "intel_batchbuffer.h" 30#include "intel_buffer_objects.h" 31#include "intel_decode.h" 32#include "intel_reg.h" 33#include "intel_bufmgr.h" 34#include "intel_buffers.h" 35 36struct cached_batch_item { 37 struct cached_batch_item *next; 38 uint16_t header; 39 uint16_t size; 40}; 41 42static void clear_cache( struct intel_context *intel ) 43{ 44 struct cached_batch_item *item = intel->batch.cached_items; 45 46 while (item) { 47 struct cached_batch_item *next = item->next; 48 free(item); 49 item = next; 50 } 51 52 intel->batch.cached_items = NULL; 53} 54 55void 56intel_batchbuffer_init(struct intel_context *intel) 57{ 58 intel_batchbuffer_reset(intel); 59 60 if (intel->gen == 6) { 61 /* We can't just use brw_state_batch to get a chunk of space for 62 * the gen6 workaround because it involves actually writing to 63 * the buffer, and the kernel doesn't let us write to the batch. 64 */ 65 intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr, 66 "gen6 workaround", 67 4096, 4096); 68 } 69} 70 71void 72intel_batchbuffer_reset(struct intel_context *intel) 73{ 74 if (intel->batch.last_bo != NULL) { 75 drm_intel_bo_unreference(intel->batch.last_bo); 76 intel->batch.last_bo = NULL; 77 } 78 intel->batch.last_bo = intel->batch.bo; 79 80 clear_cache(intel); 81 82 intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", 83 intel->maxBatchSize, 4096); 84 85 intel->batch.reserved_space = BATCH_RESERVED; 86 intel->batch.state_batch_offset = intel->batch.bo->size; 87 intel->batch.used = 0; 88} 89 90void 91intel_batchbuffer_free(struct intel_context *intel) 92{ 93 drm_intel_bo_unreference(intel->batch.last_bo); 94 drm_intel_bo_unreference(intel->batch.bo); 95 drm_intel_bo_unreference(intel->batch.workaround_bo); 96 clear_cache(intel); 97} 98 99 100/* TODO: Push this whole function into bufmgr. 101 */ 102static void 103do_flush_locked(struct intel_context *intel) 104{ 105 struct intel_batchbuffer *batch = &intel->batch; 106 int ret = 0; 107 108 if (!intel->intelScreen->no_hw) { 109 int ring; 110 111 if (intel->gen < 6 || !batch->is_blit) { 112 ring = I915_EXEC_RENDER; 113 } else { 114 ring = I915_EXEC_BLT; 115 } 116 117 ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); 118 if (ret == 0 && batch->state_batch_offset != batch->bo->size) { 119 ret = drm_intel_bo_subdata(batch->bo, 120 batch->state_batch_offset, 121 batch->bo->size - batch->state_batch_offset, 122 (char *)batch->map + batch->state_batch_offset); 123 } 124 125 if (ret == 0) 126 ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring); 127 } 128 129 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 130 drm_intel_bo_map(batch->bo, false); 131 intel_decode(batch->bo->virtual, batch->used, 132 batch->bo->offset, 133 intel->intelScreen->deviceID, GL_TRUE); 134 drm_intel_bo_unmap(batch->bo); 135 136 if (intel->vtbl.debug_batch != NULL) 137 intel->vtbl.debug_batch(intel); 138 } 139 140 if (ret != 0) { 141 exit(1); 142 } 143 intel->vtbl.new_batch(intel); 144} 145 146void 147_intel_batchbuffer_flush(struct intel_context *intel, 148 const char *file, int line) 149{ 150 if (intel->batch.used == 0) 151 return; 152 153 if (intel->first_post_swapbuffers_batch == NULL) { 154 intel->first_post_swapbuffers_batch = intel->batch.bo; 155 drm_intel_bo_reference(intel->first_post_swapbuffers_batch); 156 } 157 158 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) 159 fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, 160 4*intel->batch.used); 161 162 intel->batch.reserved_space = 0; 163 164 if (intel->always_flush_cache) { 165 intel_batchbuffer_emit_mi_flush(intel); 166 } 167 168 /* Mark the end of the buffer. */ 169 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); 170 if (intel->batch.used & 1) { 171 /* Round batchbuffer usage to 2 DWORDs. */ 172 intel_batchbuffer_emit_dword(intel, MI_NOOP); 173 } 174 175 if (intel->vtbl.finish_batch) 176 intel->vtbl.finish_batch(intel); 177 178 intel_upload_finish(intel); 179 180 /* Check that we didn't just wrap our batchbuffer at a bad time. */ 181 assert(!intel->no_batch_wrap); 182 183 do_flush_locked(intel); 184 185 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 186 fprintf(stderr, "waiting for idle\n"); 187 drm_intel_bo_wait_rendering(intel->batch.bo); 188 } 189 190 /* Reset the buffer: 191 */ 192 intel_batchbuffer_reset(intel); 193} 194 195 196/* This is the only way buffers get added to the validate list. 197 */ 198GLboolean 199intel_batchbuffer_emit_reloc(struct intel_context *intel, 200 drm_intel_bo *buffer, 201 uint32_t read_domains, uint32_t write_domain, 202 uint32_t delta) 203{ 204 int ret; 205 206 ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, 207 buffer, delta, 208 read_domains, write_domain); 209 assert(ret == 0); 210 (void)ret; 211 212 /* 213 * Using the old buffer offset, write in what the right data would be, in case 214 * the buffer doesn't move and we can short-circuit the relocation processing 215 * in the kernel 216 */ 217 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 218 219 return GL_TRUE; 220} 221 222GLboolean 223intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, 224 drm_intel_bo *buffer, 225 uint32_t read_domains, 226 uint32_t write_domain, 227 uint32_t delta) 228{ 229 int ret; 230 231 ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, 232 buffer, delta, 233 read_domains, write_domain); 234 assert(ret == 0); 235 (void)ret; 236 237 /* 238 * Using the old buffer offset, write in what the right data would 239 * be, in case the buffer doesn't move and we can short-circuit the 240 * relocation processing in the kernel 241 */ 242 intel_batchbuffer_emit_dword(intel, buffer->offset + delta); 243 244 return GL_TRUE; 245} 246 247void 248intel_batchbuffer_data(struct intel_context *intel, 249 const void *data, GLuint bytes, bool is_blit) 250{ 251 assert((bytes & 3) == 0); 252 intel_batchbuffer_require_space(intel, bytes, is_blit); 253 __memcpy(intel->batch.map + intel->batch.used, data, bytes); 254 intel->batch.used += bytes >> 2; 255} 256 257void 258intel_batchbuffer_cached_advance(struct intel_context *intel) 259{ 260 struct cached_batch_item **prev = &intel->batch.cached_items, *item; 261 uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); 262 uint32_t *start = intel->batch.map + intel->batch.emit; 263 uint16_t op = *start >> 16; 264 265 while (*prev) { 266 uint32_t *old; 267 268 item = *prev; 269 old = intel->batch.map + item->header; 270 if (op == *old >> 16) { 271 if (item->size == sz && memcmp(old, start, sz) == 0) { 272 if (prev != &intel->batch.cached_items) { 273 *prev = item->next; 274 item->next = intel->batch.cached_items; 275 intel->batch.cached_items = item; 276 } 277 intel->batch.used = intel->batch.emit; 278 return; 279 } 280 281 goto emit; 282 } 283 prev = &item->next; 284 } 285 286 item = malloc(sizeof(struct cached_batch_item)); 287 if (item == NULL) 288 return; 289 290 item->next = intel->batch.cached_items; 291 intel->batch.cached_items = item; 292 293emit: 294 item->size = sz; 295 item->header = intel->batch.emit; 296} 297 298/** 299 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for 300 * implementing two workarounds on gen6. From section 1.4.7.1 301 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: 302 * 303 * [DevSNB-C+{W/A}] Before any depth stall flush (including those 304 * produced by non-pipelined state commands), software needs to first 305 * send a PIPE_CONTROL with no bits set except Post-Sync Operation != 306 * 0. 307 * 308 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable 309 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. 310 * 311 * And the workaround for these two requires this workaround first: 312 * 313 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent 314 * BEFORE the pipe-control with a post-sync op and no write-cache 315 * flushes. 316 * 317 * And this last workaround is tricky because of the requirements on 318 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM 319 * volume 2 part 1: 320 * 321 * "1 of the following must also be set: 322 * - Render Target Cache Flush Enable ([12] of DW1) 323 * - Depth Cache Flush Enable ([0] of DW1) 324 * - Stall at Pixel Scoreboard ([1] of DW1) 325 * - Depth Stall ([13] of DW1) 326 * - Post-Sync Operation ([13] of DW1) 327 * - Notify Enable ([8] of DW1)" 328 * 329 * The cache flushes require the workaround flush that triggered this 330 * one, so we can't use it. Depth stall would trigger the same. 331 * Post-sync nonzero is what triggered this second workaround, so we 332 * can't use that one either. Notify enable is IRQs, which aren't 333 * really our business. That leaves only stall at scoreboard. 334 */ 335void 336intel_emit_post_sync_nonzero_flush(struct intel_context *intel) 337{ 338 if (!intel->batch.need_workaround_flush) 339 return; 340 341 BEGIN_BATCH(4); 342 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 343 OUT_BATCH(PIPE_CONTROL_CS_STALL | 344 PIPE_CONTROL_STALL_AT_SCOREBOARD); 345 OUT_BATCH(0); /* address */ 346 OUT_BATCH(0); /* write data */ 347 ADVANCE_BATCH(); 348 349 BEGIN_BATCH(4); 350 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 351 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); 352 OUT_RELOC(intel->batch.workaround_bo, 353 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); 354 OUT_BATCH(0); /* write data */ 355 ADVANCE_BATCH(); 356 357 intel->batch.need_workaround_flush = false; 358} 359 360/* Emit a pipelined flush to either flush render and texture cache for 361 * reading from a FBO-drawn texture, or flush so that frontbuffer 362 * render appears on the screen in DRI1. 363 * 364 * This is also used for the always_flush_cache driconf debug option. 365 */ 366void 367intel_batchbuffer_emit_mi_flush(struct intel_context *intel) 368{ 369 if (intel->gen >= 6) { 370 if (intel->batch.is_blit) { 371 BEGIN_BATCH_BLT(4); 372 OUT_BATCH(MI_FLUSH_DW); 373 OUT_BATCH(0); 374 OUT_BATCH(0); 375 OUT_BATCH(0); 376 ADVANCE_BATCH(); 377 } else { 378 if (intel->gen == 6) { 379 /* Hardware workaround: SNB B-Spec says: 380 * 381 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache 382 * Flush Enable =1, a PIPE_CONTROL with any non-zero 383 * post-sync-op is required. 384 */ 385 intel_emit_post_sync_nonzero_flush(intel); 386 } 387 388 BEGIN_BATCH(4); 389 OUT_BATCH(_3DSTATE_PIPE_CONTROL); 390 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | 391 PIPE_CONTROL_WRITE_FLUSH | 392 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 393 PIPE_CONTROL_NO_WRITE); 394 OUT_BATCH(0); /* write address */ 395 OUT_BATCH(0); /* write data */ 396 ADVANCE_BATCH(); 397 } 398 } else if (intel->gen >= 4) { 399 BEGIN_BATCH(4); 400 OUT_BATCH(_3DSTATE_PIPE_CONTROL | 401 PIPE_CONTROL_WRITE_FLUSH | 402 PIPE_CONTROL_NO_WRITE); 403 OUT_BATCH(0); /* write address */ 404 OUT_BATCH(0); /* write data */ 405 OUT_BATCH(0); /* write data */ 406 ADVANCE_BATCH(); 407 } else { 408 BEGIN_BATCH(1); 409 OUT_BATCH(MI_FLUSH); 410 ADVANCE_BATCH(); 411 } 412} 413