radeon_drm_cs.c revision 076db67217741aa820feadccc66067516d4cf4ca
1/* 2 * Copyright © 2008 Jérôme Glisse 3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27/* 28 * Authors: 29 * Marek Olšák <maraeo@gmail.com> 30 * 31 * Based on work from libdrm_radeon by: 32 * Aapo Tahkola <aet@rasterburn.org> 33 * Nicolai Haehnle <prefect_@gmx.net> 34 * Jérôme Glisse <glisse@freedesktop.org> 35 */ 36 37/* 38 This file replaces libdrm's radeon_cs_gem with our own implemention. 39 It's optimized specifically for Radeon DRM. 40 Adding buffers and space checking are faster and simpler than their 41 counterparts in libdrm (the time complexity of all the functions 42 is O(1) in nearly all scenarios, thanks to hashing). 43 44 It works like this: 45 46 cs_add_buffer(cs, buf, read_domain, write_domain) adds a new relocation and 47 also adds the size of 'buf' to the used_gart and used_vram winsys variables 48 based on the domains, which are simply or'd for the accounting purposes. 49 The adding is skipped if the reloc is already present in the list, but it 50 accounts any newly-referenced domains. 51 52 cs_validate is then called, which just checks: 53 used_vram/gart < vram/gart_size * 0.8 54 The 0.8 number allows for some memory fragmentation. If the validation 55 fails, the pipe driver flushes CS and tries do the validation again, 56 i.e. it validates only that one operation. If it fails again, it drops 57 the operation on the floor and prints some nasty message to stderr. 58 (done in the pipe driver) 59 60 cs_write_reloc(cs, buf) just writes a reloc that has been added using 61 cs_add_buffer. The read_domain and write_domain parameters have been removed, 62 because we already specify them in cs_add_buffer. 63*/ 64 65#include "radeon_drm_cs.h" 66 67#include "util/u_memory.h" 68#include "os/os_time.h" 69 70#include <stdio.h> 71#include <stdlib.h> 72#include <stdint.h> 73#include <xf86drm.h> 74 75 76#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) 77 78static struct pipe_fence_handle * 79radeon_cs_create_fence(struct radeon_winsys_cs *rcs); 80static void radeon_fence_reference(struct pipe_fence_handle **dst, 81 struct pipe_fence_handle *src); 82 83static struct radeon_winsys_ctx *radeon_drm_ctx_create(struct radeon_winsys *ws) 84{ 85 /* No context support here. Just return the winsys pointer 86 * as the "context". */ 87 return (struct radeon_winsys_ctx*)ws; 88} 89 90static void radeon_drm_ctx_destroy(struct radeon_winsys_ctx *ctx) 91{ 92 /* No context support here. */ 93} 94 95static bool radeon_init_cs_context(struct radeon_cs_context *csc, 96 struct radeon_drm_winsys *ws) 97{ 98 int i; 99 100 csc->fd = ws->fd; 101 csc->nrelocs = 512; 102 csc->relocs_bo = (struct radeon_bo_item*) 103 CALLOC(1, csc->nrelocs * sizeof(csc->relocs_bo[0])); 104 if (!csc->relocs_bo) { 105 return false; 106 } 107 108 csc->relocs = (struct drm_radeon_cs_reloc*) 109 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); 110 if (!csc->relocs) { 111 FREE(csc->relocs_bo); 112 return false; 113 } 114 115 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; 116 csc->chunks[0].length_dw = 0; 117 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; 118 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; 119 csc->chunks[1].length_dw = 0; 120 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 121 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; 122 csc->chunks[2].length_dw = 2; 123 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags; 124 125 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; 126 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; 127 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; 128 129 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; 130 131 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) { 132 csc->reloc_indices_hashlist[i] = -1; 133 } 134 return true; 135} 136 137static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) 138{ 139 unsigned i; 140 141 for (i = 0; i < csc->crelocs; i++) { 142 p_atomic_dec(&csc->relocs_bo[i].bo->num_cs_references); 143 radeon_bo_reference(&csc->relocs_bo[i].bo, NULL); 144 } 145 146 csc->crelocs = 0; 147 csc->validated_crelocs = 0; 148 csc->chunks[0].length_dw = 0; 149 csc->chunks[1].length_dw = 0; 150 151 for (i = 0; i < ARRAY_SIZE(csc->reloc_indices_hashlist); i++) { 152 csc->reloc_indices_hashlist[i] = -1; 153 } 154} 155 156static void radeon_destroy_cs_context(struct radeon_cs_context *csc) 157{ 158 radeon_cs_context_cleanup(csc); 159 FREE(csc->relocs_bo); 160 FREE(csc->relocs); 161} 162 163 164static struct radeon_winsys_cs * 165radeon_drm_cs_create(struct radeon_winsys_ctx *ctx, 166 enum ring_type ring_type, 167 void (*flush)(void *ctx, unsigned flags, 168 struct pipe_fence_handle **fence), 169 void *flush_ctx) 170{ 171 struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)ctx; 172 struct radeon_drm_cs *cs; 173 174 cs = CALLOC_STRUCT(radeon_drm_cs); 175 if (!cs) { 176 return NULL; 177 } 178 util_queue_fence_init(&cs->flush_completed); 179 180 cs->ws = ws; 181 cs->flush_cs = flush; 182 cs->flush_data = flush_ctx; 183 184 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { 185 FREE(cs); 186 return NULL; 187 } 188 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { 189 radeon_destroy_cs_context(&cs->csc1); 190 FREE(cs); 191 return NULL; 192 } 193 194 /* Set the first command buffer as current. */ 195 cs->csc = &cs->csc1; 196 cs->cst = &cs->csc2; 197 cs->base.current.buf = cs->csc->buf; 198 cs->base.current.max_dw = ARRAY_SIZE(cs->csc->buf); 199 cs->ring_type = ring_type; 200 201 p_atomic_inc(&ws->num_cs); 202 return &cs->base; 203} 204 205#define OUT_CS(cs, value) (cs)->current.buf[(cs)->current.cdw++] = (value) 206 207static inline void update_reloc(struct drm_radeon_cs_reloc *reloc, 208 enum radeon_bo_domain rd, 209 enum radeon_bo_domain wd, 210 unsigned priority, 211 enum radeon_bo_domain *added_domains) 212{ 213 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); 214 215 reloc->read_domains |= rd; 216 reloc->write_domain |= wd; 217 reloc->flags = MAX2(reloc->flags, priority); 218} 219 220int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo) 221{ 222 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); 223 int i = csc->reloc_indices_hashlist[hash]; 224 225 /* not found or found */ 226 if (i == -1 || csc->relocs_bo[i].bo == bo) 227 return i; 228 229 /* Hash collision, look for the BO in the list of relocs linearly. */ 230 for (i = csc->crelocs - 1; i >= 0; i--) { 231 if (csc->relocs_bo[i].bo == bo) { 232 /* Put this reloc in the hash list. 233 * This will prevent additional hash collisions if there are 234 * several consecutive lookup_buffer calls for the same buffer. 235 * 236 * Example: Assuming buffers A,B,C collide in the hash list, 237 * the following sequence of relocs: 238 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC 239 * will collide here: ^ and here: ^, 240 * meaning that we should get very few collisions in the end. */ 241 csc->reloc_indices_hashlist[hash] = i; 242 return i; 243 } 244 } 245 return -1; 246} 247 248static unsigned radeon_add_buffer(struct radeon_drm_cs *cs, 249 struct radeon_bo *bo, 250 enum radeon_bo_usage usage, 251 enum radeon_bo_domain domains, 252 unsigned priority, 253 enum radeon_bo_domain *added_domains) 254{ 255 struct radeon_cs_context *csc = cs->csc; 256 struct drm_radeon_cs_reloc *reloc; 257 unsigned hash = bo->handle & (ARRAY_SIZE(csc->reloc_indices_hashlist)-1); 258 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; 259 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; 260 int i = -1; 261 262 assert(priority < 64); 263 *added_domains = 0; 264 265 i = radeon_lookup_buffer(csc, bo); 266 267 if (i >= 0) { 268 reloc = &csc->relocs[i]; 269 update_reloc(reloc, rd, wd, priority / 4, added_domains); 270 csc->relocs_bo[i].priority_usage |= 1llu << priority; 271 272 /* For async DMA, every add_buffer call must add a buffer to the list 273 * no matter how many duplicates there are. This is due to the fact 274 * the DMA CS checker doesn't use NOP packets for offset patching, 275 * but always uses the i-th buffer from the list to patch the i-th 276 * offset. If there are N offsets in a DMA CS, there must also be N 277 * buffers in the relocation list. 278 * 279 * This doesn't have to be done if virtual memory is enabled, 280 * because there is no offset patching with virtual memory. 281 */ 282 if (cs->ring_type != RING_DMA || cs->ws->info.has_virtual_memory) { 283 return i; 284 } 285 } 286 287 /* New relocation, check if the backing array is large enough. */ 288 if (csc->crelocs >= csc->nrelocs) { 289 uint32_t size; 290 csc->nrelocs += 10; 291 292 size = csc->nrelocs * sizeof(csc->relocs_bo[0]); 293 csc->relocs_bo = realloc(csc->relocs_bo, size); 294 295 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); 296 csc->relocs = realloc(csc->relocs, size); 297 298 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 299 } 300 301 /* Initialize the new relocation. */ 302 csc->relocs_bo[csc->crelocs].bo = NULL; 303 csc->relocs_bo[csc->crelocs].priority_usage = 1llu << priority; 304 radeon_bo_reference(&csc->relocs_bo[csc->crelocs].bo, bo); 305 p_atomic_inc(&bo->num_cs_references); 306 reloc = &csc->relocs[csc->crelocs]; 307 reloc->handle = bo->handle; 308 reloc->read_domains = rd; 309 reloc->write_domain = wd; 310 reloc->flags = priority / 4; 311 312 csc->reloc_indices_hashlist[hash] = csc->crelocs; 313 314 csc->chunks[1].length_dw += RELOC_DWORDS; 315 316 *added_domains = rd | wd; 317 return csc->crelocs++; 318} 319 320static unsigned radeon_drm_cs_add_buffer(struct radeon_winsys_cs *rcs, 321 struct pb_buffer *buf, 322 enum radeon_bo_usage usage, 323 enum radeon_bo_domain domains, 324 enum radeon_bo_priority priority) 325{ 326 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 327 struct radeon_bo *bo = (struct radeon_bo*)buf; 328 enum radeon_bo_domain added_domains; 329 unsigned index = radeon_add_buffer(cs, bo, usage, domains, priority, 330 &added_domains); 331 332 if (added_domains & RADEON_DOMAIN_VRAM) 333 cs->base.used_vram += bo->base.size; 334 else if (added_domains & RADEON_DOMAIN_GTT) 335 cs->base.used_gart += bo->base.size; 336 337 return index; 338} 339 340static int radeon_drm_cs_lookup_buffer(struct radeon_winsys_cs *rcs, 341 struct pb_buffer *buf) 342{ 343 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 344 345 return radeon_lookup_buffer(cs->csc, (struct radeon_bo*)buf); 346} 347 348static bool radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) 349{ 350 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 351 bool status = 352 cs->base.used_gart < cs->ws->info.gart_size * 0.8 && 353 cs->base.used_vram < cs->ws->info.vram_size * 0.8; 354 355 if (status) { 356 cs->csc->validated_crelocs = cs->csc->crelocs; 357 } else { 358 /* Remove lately-added buffers. The validation failed with them 359 * and the CS is about to be flushed because of that. Keep only 360 * the already-validated buffers. */ 361 unsigned i; 362 363 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { 364 p_atomic_dec(&cs->csc->relocs_bo[i].bo->num_cs_references); 365 radeon_bo_reference(&cs->csc->relocs_bo[i].bo, NULL); 366 } 367 cs->csc->crelocs = cs->csc->validated_crelocs; 368 369 /* Flush if there are any relocs. Clean up otherwise. */ 370 if (cs->csc->crelocs) { 371 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL); 372 } else { 373 radeon_cs_context_cleanup(cs->csc); 374 cs->base.used_vram = 0; 375 cs->base.used_gart = 0; 376 377 assert(cs->base.current.cdw == 0); 378 if (cs->base.current.cdw != 0) { 379 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); 380 } 381 } 382 } 383 return status; 384} 385 386static bool radeon_drm_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw) 387{ 388 assert(rcs->current.cdw <= rcs->current.max_dw); 389 return rcs->current.max_dw - rcs->current.cdw >= dw; 390} 391 392static bool radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt) 393{ 394 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 395 396 vram += cs->base.used_vram; 397 gtt += cs->base.used_gart; 398 399 /* Anything that goes above the VRAM size should go to GTT. */ 400 if (vram > cs->ws->info.vram_size) 401 gtt += vram - cs->ws->info.vram_size; 402 403 /* Now we just need to check if we have enough GTT. */ 404 return gtt < cs->ws->info.gart_size * 0.7; 405} 406 407static unsigned radeon_drm_cs_get_buffer_list(struct radeon_winsys_cs *rcs, 408 struct radeon_bo_list_item *list) 409{ 410 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 411 int i; 412 413 if (list) { 414 for (i = 0; i < cs->csc->crelocs; i++) { 415 list[i].bo_size = cs->csc->relocs_bo[i].bo->base.size; 416 list[i].vm_address = cs->csc->relocs_bo[i].bo->va; 417 list[i].priority_usage = cs->csc->relocs_bo[i].priority_usage; 418 } 419 } 420 return cs->csc->crelocs; 421} 422 423void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index) 424{ 425 struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst; 426 unsigned i; 427 int r; 428 429 r = drmCommandWriteRead(csc->fd, DRM_RADEON_CS, 430 &csc->cs, sizeof(struct drm_radeon_cs)); 431 if (r) { 432 if (r == -ENOMEM) 433 fprintf(stderr, "radeon: Not enough memory for command submission.\n"); 434 else if (debug_get_bool_option("RADEON_DUMP_CS", false)) { 435 unsigned i; 436 437 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); 438 for (i = 0; i < csc->chunks[0].length_dw; i++) { 439 fprintf(stderr, "0x%08X\n", csc->buf[i]); 440 } 441 } else { 442 fprintf(stderr, "radeon: The kernel rejected CS, " 443 "see dmesg for more information (%i).\n", r); 444 } 445 } 446 447 for (i = 0; i < csc->crelocs; i++) 448 p_atomic_dec(&csc->relocs_bo[i].bo->num_active_ioctls); 449 450 radeon_cs_context_cleanup(csc); 451} 452 453/* 454 * Make sure previous submission of this cs are completed 455 */ 456void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) 457{ 458 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 459 460 /* Wait for any pending ioctl of this CS to complete. */ 461 if (util_queue_is_initialized(&cs->ws->cs_queue)) 462 util_queue_job_wait(&cs->flush_completed); 463} 464 465DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false) 466 467static int radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, 468 unsigned flags, 469 struct pipe_fence_handle **fence) 470{ 471 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 472 struct radeon_cs_context *tmp; 473 474 switch (cs->ring_type) { 475 case RING_DMA: 476 /* pad DMA ring to 8 DWs */ 477 if (cs->ws->info.chip_class <= SI) { 478 while (rcs->current.cdw & 7) 479 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ 480 } else { 481 while (rcs->current.cdw & 7) 482 OUT_CS(&cs->base, 0x00000000); /* NOP packet */ 483 } 484 break; 485 case RING_GFX: 486 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements 487 * r6xx, requires at least 4 dw alignment to avoid a hw bug. 488 */ 489 if (cs->ws->info.gfx_ib_pad_with_type2) { 490 while (rcs->current.cdw & 7) 491 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 492 } else { 493 while (rcs->current.cdw & 7) 494 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ 495 } 496 break; 497 case RING_UVD: 498 while (rcs->current.cdw & 15) 499 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 500 break; 501 default: 502 break; 503 } 504 505 if (rcs->current.cdw > rcs->current.max_dw) { 506 fprintf(stderr, "radeon: command stream overflowed\n"); 507 } 508 509 if (fence) { 510 radeon_fence_reference(fence, NULL); 511 *fence = radeon_cs_create_fence(rcs); 512 } 513 514 radeon_drm_cs_sync_flush(rcs); 515 516 /* Swap command streams. */ 517 tmp = cs->csc; 518 cs->csc = cs->cst; 519 cs->cst = tmp; 520 521 /* If the CS is not empty or overflowed, emit it in a separate thread. */ 522 if (cs->base.current.cdw && cs->base.current.cdw <= cs->base.current.max_dw && !debug_get_option_noop()) { 523 unsigned i, crelocs; 524 525 crelocs = cs->cst->crelocs; 526 527 cs->cst->chunks[0].length_dw = cs->base.current.cdw; 528 529 for (i = 0; i < crelocs; i++) { 530 /* Update the number of active asynchronous CS ioctls for the buffer. */ 531 p_atomic_inc(&cs->cst->relocs_bo[i].bo->num_active_ioctls); 532 } 533 534 switch (cs->ring_type) { 535 case RING_DMA: 536 cs->cst->flags[0] = 0; 537 cs->cst->flags[1] = RADEON_CS_RING_DMA; 538 cs->cst->cs.num_chunks = 3; 539 if (cs->ws->info.has_virtual_memory) { 540 cs->cst->flags[0] |= RADEON_CS_USE_VM; 541 } 542 break; 543 544 case RING_UVD: 545 cs->cst->flags[0] = 0; 546 cs->cst->flags[1] = RADEON_CS_RING_UVD; 547 cs->cst->cs.num_chunks = 3; 548 break; 549 550 case RING_VCE: 551 cs->cst->flags[0] = 0; 552 cs->cst->flags[1] = RADEON_CS_RING_VCE; 553 cs->cst->cs.num_chunks = 3; 554 break; 555 556 default: 557 case RING_GFX: 558 case RING_COMPUTE: 559 cs->cst->flags[0] = RADEON_CS_KEEP_TILING_FLAGS; 560 cs->cst->flags[1] = RADEON_CS_RING_GFX; 561 cs->cst->cs.num_chunks = 3; 562 563 if (cs->ws->info.has_virtual_memory) { 564 cs->cst->flags[0] |= RADEON_CS_USE_VM; 565 cs->cst->cs.num_chunks = 3; 566 } 567 if (flags & RADEON_FLUSH_END_OF_FRAME) { 568 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; 569 cs->cst->cs.num_chunks = 3; 570 } 571 if (cs->ring_type == RING_COMPUTE) { 572 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; 573 cs->cst->cs.num_chunks = 3; 574 } 575 break; 576 } 577 578 if (util_queue_is_initialized(&cs->ws->cs_queue)) { 579 util_queue_add_job(&cs->ws->cs_queue, cs, &cs->flush_completed, 580 radeon_drm_cs_emit_ioctl_oneshot, NULL); 581 if (!(flags & RADEON_FLUSH_ASYNC)) 582 radeon_drm_cs_sync_flush(rcs); 583 } else { 584 radeon_drm_cs_emit_ioctl_oneshot(cs, 0); 585 } 586 } else { 587 radeon_cs_context_cleanup(cs->cst); 588 } 589 590 /* Prepare a new CS. */ 591 cs->base.current.buf = cs->csc->buf; 592 cs->base.current.cdw = 0; 593 cs->base.used_vram = 0; 594 cs->base.used_gart = 0; 595 596 cs->ws->num_cs_flushes++; 597 return 0; 598} 599 600static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) 601{ 602 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 603 604 radeon_drm_cs_sync_flush(rcs); 605 util_queue_fence_destroy(&cs->flush_completed); 606 radeon_cs_context_cleanup(&cs->csc1); 607 radeon_cs_context_cleanup(&cs->csc2); 608 p_atomic_dec(&cs->ws->num_cs); 609 radeon_destroy_cs_context(&cs->csc1); 610 radeon_destroy_cs_context(&cs->csc2); 611 FREE(cs); 612} 613 614static bool radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, 615 struct pb_buffer *_buf, 616 enum radeon_bo_usage usage) 617{ 618 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 619 struct radeon_bo *bo = (struct radeon_bo*)_buf; 620 int index; 621 622 if (!bo->num_cs_references) 623 return false; 624 625 index = radeon_lookup_buffer(cs->csc, bo); 626 if (index == -1) 627 return false; 628 629 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) 630 return true; 631 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) 632 return true; 633 634 return false; 635} 636 637/* FENCES */ 638 639static struct pipe_fence_handle * 640radeon_cs_create_fence(struct radeon_winsys_cs *rcs) 641{ 642 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 643 struct pb_buffer *fence; 644 645 /* Create a fence, which is a dummy BO. */ 646 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, 647 RADEON_DOMAIN_GTT, 0); 648 /* Add the fence as a dummy relocation. */ 649 cs->ws->base.cs_add_buffer(rcs, fence, 650 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, 651 RADEON_PRIO_FENCE); 652 return (struct pipe_fence_handle*)fence; 653} 654 655static bool radeon_fence_wait(struct radeon_winsys *ws, 656 struct pipe_fence_handle *fence, 657 uint64_t timeout) 658{ 659 return ws->buffer_wait((struct pb_buffer*)fence, timeout, 660 RADEON_USAGE_READWRITE); 661} 662 663static void radeon_fence_reference(struct pipe_fence_handle **dst, 664 struct pipe_fence_handle *src) 665{ 666 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); 667} 668 669void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) 670{ 671 ws->base.ctx_create = radeon_drm_ctx_create; 672 ws->base.ctx_destroy = radeon_drm_ctx_destroy; 673 ws->base.cs_create = radeon_drm_cs_create; 674 ws->base.cs_destroy = radeon_drm_cs_destroy; 675 ws->base.cs_add_buffer = radeon_drm_cs_add_buffer; 676 ws->base.cs_lookup_buffer = radeon_drm_cs_lookup_buffer; 677 ws->base.cs_validate = radeon_drm_cs_validate; 678 ws->base.cs_check_space = radeon_drm_cs_check_space; 679 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; 680 ws->base.cs_get_buffer_list = radeon_drm_cs_get_buffer_list; 681 ws->base.cs_flush = radeon_drm_cs_flush; 682 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; 683 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; 684 ws->base.fence_wait = radeon_fence_wait; 685 ws->base.fence_reference = radeon_fence_reference; 686} 687