radeon_drm_cs.c revision 67aef6dafa29fed008ea6065c425a6a92a651be9
1/* 2 * Copyright © 2008 Jérôme Glisse 3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27/* 28 * Authors: 29 * Marek Olšák <maraeo@gmail.com> 30 * 31 * Based on work from libdrm_radeon by: 32 * Aapo Tahkola <aet@rasterburn.org> 33 * Nicolai Haehnle <prefect_@gmx.net> 34 * Jérôme Glisse <glisse@freedesktop.org> 35 */ 36 37/* 38 This file replaces libdrm's radeon_cs_gem with our own implemention. 39 It's optimized specifically for Radeon DRM. 40 Reloc writes and space checking are faster and simpler than their 41 counterparts in libdrm (the time complexity of all the functions 42 is O(1) in nearly all scenarios, thanks to hashing). 43 44 It works like this: 45 46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and 47 also adds the size of 'buf' to the used_gart and used_vram winsys variables 48 based on the domains, which are simply or'd for the accounting purposes. 49 The adding is skipped if the reloc is already present in the list, but it 50 accounts any newly-referenced domains. 51 52 cs_validate is then called, which just checks: 53 used_vram/gart < vram/gart_size * 0.8 54 The 0.8 number allows for some memory fragmentation. If the validation 55 fails, the pipe driver flushes CS and tries do the validation again, 56 i.e. it validates only that one operation. If it fails again, it drops 57 the operation on the floor and prints some nasty message to stderr. 58 (done in the pipe driver) 59 60 cs_write_reloc(cs, buf) just writes a reloc that has been added using 61 cs_add_reloc. The read_domain and write_domain parameters have been removed, 62 because we already specify them in cs_add_reloc. 63*/ 64 65#include "radeon_drm_cs.h" 66 67#include "util/u_memory.h" 68#include "os/os_time.h" 69 70#include <stdio.h> 71#include <stdlib.h> 72#include <stdint.h> 73#include <xf86drm.h> 74 75/* 76 * this are copy from radeon_drm, once an updated libdrm is released 77 * we should bump configure.ac requirement for it and remove the following 78 * field 79 */ 80#ifndef RADEON_CHUNK_ID_FLAGS 81#define RADEON_CHUNK_ID_FLAGS 0x03 82 83/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ 84#define RADEON_CS_KEEP_TILING_FLAGS 0x01 85#endif 86 87#ifndef RADEON_CS_USE_VM 88#define RADEON_CS_USE_VM 0x02 89/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */ 90#define RADEON_CS_RING_GFX 0 91#define RADEON_CS_RING_COMPUTE 1 92#endif 93 94#ifndef RADEON_CS_RING_DMA 95#define RADEON_CS_RING_DMA 2 96#endif 97 98#ifndef RADEON_CS_RING_UVD 99#define RADEON_CS_RING_UVD 3 100#endif 101 102#ifndef RADEON_CS_RING_VCE 103#define RADEON_CS_RING_VCE 4 104#endif 105 106#ifndef RADEON_CS_END_OF_FRAME 107#define RADEON_CS_END_OF_FRAME 0x04 108#endif 109 110 111#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) 112 113static boolean radeon_init_cs_context(struct radeon_cs_context *csc, 114 struct radeon_drm_winsys *ws) 115{ 116 csc->fd = ws->fd; 117 csc->nrelocs = 512; 118 csc->relocs_bo = (struct radeon_bo**) 119 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); 120 if (!csc->relocs_bo) { 121 return FALSE; 122 } 123 124 csc->relocs = (struct drm_radeon_cs_reloc*) 125 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); 126 if (!csc->relocs) { 127 FREE(csc->relocs_bo); 128 return FALSE; 129 } 130 131 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; 132 csc->chunks[0].length_dw = 0; 133 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; 134 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; 135 csc->chunks[1].length_dw = 0; 136 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 137 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; 138 csc->chunks[2].length_dw = 2; 139 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags; 140 141 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; 142 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; 143 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; 144 145 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; 146 return TRUE; 147} 148 149static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) 150{ 151 unsigned i; 152 153 for (i = 0; i < csc->crelocs; i++) { 154 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references); 155 radeon_bo_reference(&csc->relocs_bo[i], NULL); 156 } 157 158 csc->crelocs = 0; 159 csc->validated_crelocs = 0; 160 csc->chunks[0].length_dw = 0; 161 csc->chunks[1].length_dw = 0; 162 csc->used_gart = 0; 163 csc->used_vram = 0; 164 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added)); 165} 166 167static void radeon_destroy_cs_context(struct radeon_cs_context *csc) 168{ 169 radeon_cs_context_cleanup(csc); 170 FREE(csc->relocs_bo); 171 FREE(csc->relocs); 172} 173 174 175static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, 176 enum ring_type ring_type, 177 struct radeon_winsys_cs_handle *trace_buf) 178{ 179 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); 180 struct radeon_drm_cs *cs; 181 182 cs = CALLOC_STRUCT(radeon_drm_cs); 183 if (!cs) { 184 return NULL; 185 } 186 pipe_semaphore_init(&cs->flush_completed, 1); 187 188 cs->ws = ws; 189 cs->trace_buf = (struct radeon_bo*)trace_buf; 190 191 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { 192 FREE(cs); 193 return NULL; 194 } 195 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { 196 radeon_destroy_cs_context(&cs->csc1); 197 FREE(cs); 198 return NULL; 199 } 200 201 /* Set the first command buffer as current. */ 202 cs->csc = &cs->csc1; 203 cs->cst = &cs->csc2; 204 cs->base.buf = cs->csc->buf; 205 cs->base.ring_type = ring_type; 206 207 p_atomic_inc(&ws->num_cs); 208 return &cs->base; 209} 210 211#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) 212 213static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc, 214 enum radeon_bo_usage usage, 215 enum radeon_bo_domain new_domain, 216 enum radeon_bo_domain *added_domains) 217{ 218 enum radeon_bo_domain current = reloc->read_domains | reloc->write_domain; 219 enum radeon_bo_domain final; 220 221 /* If there is at least one command which wants the buffer to be in VRAM 222 * only, keep it in VRAM. */ 223 if ((current & new_domain) == RADEON_DOMAIN_VRAM) 224 final = RADEON_DOMAIN_VRAM; 225 else 226 final = current | new_domain; 227 228 *added_domains = final & ~current; 229 230 /* If we have at least one write usage... */ 231 if (usage & RADEON_USAGE_WRITE || reloc->write_domain) { 232 reloc->write_domain = final; 233 reloc->read_domains = 0; 234 } else { 235 /* write_domain is zero */ 236 reloc->read_domains = final; 237 } 238} 239 240int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) 241{ 242 struct drm_radeon_cs_reloc *reloc; 243 unsigned i; 244 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); 245 246 if (csc->is_handle_added[hash]) { 247 i = csc->reloc_indices_hashlist[hash]; 248 reloc = &csc->relocs[i]; 249 if (reloc->handle == bo->handle) { 250 return i; 251 } 252 253 /* Hash collision, look for the BO in the list of relocs linearly. */ 254 for (i = csc->crelocs; i != 0;) { 255 --i; 256 reloc = &csc->relocs[i]; 257 if (reloc->handle == bo->handle) { 258 /* Put this reloc in the hash list. 259 * This will prevent additional hash collisions if there are 260 * several consecutive get_reloc calls for the same buffer. 261 * 262 * Example: Assuming buffers A,B,C collide in the hash list, 263 * the following sequence of relocs: 264 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC 265 * will collide here: ^ and here: ^, 266 * meaning that we should get very few collisions in the end. */ 267 csc->reloc_indices_hashlist[hash] = i; 268 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ 269 return i; 270 } 271 } 272 } 273 274 return -1; 275} 276 277static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, 278 struct radeon_bo *bo, 279 enum radeon_bo_usage usage, 280 enum radeon_bo_domain domains, 281 enum radeon_bo_domain *added_domains) 282{ 283 struct radeon_cs_context *csc = cs->csc; 284 struct drm_radeon_cs_reloc *reloc; 285 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); 286 bool update_hash = TRUE; 287 int i; 288 289 *added_domains = 0; 290 if (csc->is_handle_added[hash]) { 291 i = csc->reloc_indices_hashlist[hash]; 292 reloc = &csc->relocs[i]; 293 if (reloc->handle != bo->handle) { 294 /* Hash collision, look for the BO in the list of relocs linearly. */ 295 for (i = csc->crelocs - 1; i >= 0; i--) { 296 reloc = &csc->relocs[i]; 297 if (reloc->handle == bo->handle) { 298 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ 299 break; 300 } 301 } 302 } 303 304 if (i >= 0) { 305 /* On DMA ring we need to emit as many relocation as there is use of the bo 306 * thus each time this function is call we should grow add again the bo to 307 * the relocation buffer 308 * 309 * Do not update the hash table if it's dma ring, so that first hash always point 310 * to first bo relocation which will the one used by the kernel. Following relocation 311 * will be ignore by the kernel memory placement (but still use by the kernel to 312 * update the cmd stream with proper buffer offset). 313 */ 314 update_hash = FALSE; 315 update_reloc_domains(reloc, usage, domains, added_domains); 316 if (cs->base.ring_type != RING_DMA) { 317 csc->reloc_indices_hashlist[hash] = i; 318 return i; 319 } 320 } 321 } 322 323 /* New relocation, check if the backing array is large enough. */ 324 if (csc->crelocs >= csc->nrelocs) { 325 uint32_t size; 326 csc->nrelocs += 10; 327 328 size = csc->nrelocs * sizeof(struct radeon_bo*); 329 csc->relocs_bo = realloc(csc->relocs_bo, size); 330 331 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); 332 csc->relocs = realloc(csc->relocs, size); 333 334 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 335 } 336 337 /* Initialize the new relocation. */ 338 csc->relocs_bo[csc->crelocs] = NULL; 339 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo); 340 p_atomic_inc(&bo->num_cs_references); 341 reloc = &csc->relocs[csc->crelocs]; 342 reloc->handle = bo->handle; 343 if (usage & RADEON_USAGE_WRITE) 344 reloc->write_domain = domains; 345 else 346 reloc->read_domains = domains; 347 reloc->flags = 0; 348 349 csc->is_handle_added[hash] = TRUE; 350 if (update_hash) { 351 csc->reloc_indices_hashlist[hash] = csc->crelocs; 352 } 353 354 csc->chunks[1].length_dw += RELOC_DWORDS; 355 356 *added_domains = domains; 357 return csc->crelocs++; 358} 359 360static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, 361 struct radeon_winsys_cs_handle *buf, 362 enum radeon_bo_usage usage, 363 enum radeon_bo_domain domains) 364{ 365 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 366 struct radeon_bo *bo = (struct radeon_bo*)buf; 367 enum radeon_bo_domain added_domains; 368 unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains); 369 370 if (added_domains & RADEON_DOMAIN_GTT) 371 cs->csc->used_gart += bo->base.size; 372 if (added_domains & RADEON_DOMAIN_VRAM) 373 cs->csc->used_vram += bo->base.size; 374 375 return index; 376} 377 378static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) 379{ 380 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 381 boolean status = 382 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && 383 cs->csc->used_vram < cs->ws->info.vram_size * 0.8; 384 385 if (status) { 386 cs->csc->validated_crelocs = cs->csc->crelocs; 387 } else { 388 /* Remove lately-added relocations. The validation failed with them 389 * and the CS is about to be flushed because of that. Keep only 390 * the already-validated relocations. */ 391 unsigned i; 392 393 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { 394 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); 395 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); 396 } 397 cs->csc->crelocs = cs->csc->validated_crelocs; 398 399 /* Flush if there are any relocs. Clean up otherwise. */ 400 if (cs->csc->crelocs) { 401 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); 402 } else { 403 radeon_cs_context_cleanup(cs->csc); 404 405 assert(cs->base.cdw == 0); 406 if (cs->base.cdw != 0) { 407 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); 408 } 409 } 410 } 411 return status; 412} 413 414static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt) 415{ 416 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 417 boolean status = 418 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 && 419 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7; 420 421 return status; 422} 423 424static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, 425 struct radeon_winsys_cs_handle *buf) 426{ 427 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 428 struct radeon_bo *bo = (struct radeon_bo*)buf; 429 unsigned index = radeon_get_reloc(cs->csc, bo); 430 431 if (index == -1) { 432 fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__); 433 return; 434 } 435 436 OUT_CS(&cs->base, 0xc0001000); 437 OUT_CS(&cs->base, index * RELOC_DWORDS); 438} 439 440void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) 441{ 442 unsigned i; 443 444 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS, 445 &csc->cs, sizeof(struct drm_radeon_cs))) { 446 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { 447 unsigned i; 448 449 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); 450 for (i = 0; i < csc->chunks[0].length_dw; i++) { 451 fprintf(stderr, "0x%08X\n", csc->buf[i]); 452 } 453 } else { 454 fprintf(stderr, "radeon: The kernel rejected CS, " 455 "see dmesg for more information.\n"); 456 } 457 } 458 459 if (cs->trace_buf) { 460 radeon_dump_cs_on_lockup(cs, csc); 461 } 462 463 for (i = 0; i < csc->crelocs; i++) 464 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); 465 466 radeon_cs_context_cleanup(csc); 467} 468 469/* 470 * Make sure previous submission of this cs are completed 471 */ 472void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) 473{ 474 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 475 476 /* Wait for any pending ioctl to complete. */ 477 if (cs->ws->thread) { 478 pipe_semaphore_wait(&cs->flush_completed); 479 pipe_semaphore_signal(&cs->flush_completed); 480 } 481} 482 483DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE) 484 485static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id) 486{ 487 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 488 struct radeon_cs_context *tmp; 489 490 switch (cs->base.ring_type) { 491 case RING_DMA: 492 /* pad DMA ring to 8 DWs */ 493 if (cs->ws->info.chip_class <= SI) { 494 while (rcs->cdw & 7) 495 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ 496 } else { 497 while (rcs->cdw & 7) 498 OUT_CS(&cs->base, 0x00000000); /* NOP packet */ 499 } 500 break; 501 case RING_GFX: 502 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements 503 * r6xx, requires at least 4 dw alignment to avoid a hw bug. 504 */ 505 if (cs->ws->info.chip_class <= SI) { 506 while (rcs->cdw & 7) 507 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 508 } else { 509 while (rcs->cdw & 7) 510 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ 511 } 512 break; 513 case RING_UVD: 514 while (rcs->cdw & 15) 515 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 516 break; 517 default: 518 break; 519 } 520 521 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { 522 fprintf(stderr, "radeon: command stream overflowed\n"); 523 } 524 525 radeon_drm_cs_sync_flush(rcs); 526 527 /* Flip command streams. */ 528 tmp = cs->csc; 529 cs->csc = cs->cst; 530 cs->cst = tmp; 531 532 cs->cst->cs_trace_id = cs_trace_id; 533 534 /* If the CS is not empty or overflowed, emit it in a separate thread. */ 535 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { 536 unsigned i, crelocs = cs->cst->crelocs; 537 538 cs->cst->chunks[0].length_dw = cs->base.cdw; 539 540 for (i = 0; i < crelocs; i++) { 541 /* Update the number of active asynchronous CS ioctls for the buffer. */ 542 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); 543 } 544 545 switch (cs->base.ring_type) { 546 case RING_DMA: 547 cs->cst->flags[0] = 0; 548 cs->cst->flags[1] = RADEON_CS_RING_DMA; 549 cs->cst->cs.num_chunks = 3; 550 if (cs->ws->info.r600_virtual_address) { 551 cs->cst->flags[0] |= RADEON_CS_USE_VM; 552 } 553 break; 554 555 case RING_UVD: 556 cs->cst->flags[0] = 0; 557 cs->cst->flags[1] = RADEON_CS_RING_UVD; 558 cs->cst->cs.num_chunks = 3; 559 break; 560 561 case RING_VCE: 562 cs->cst->flags[0] = 0; 563 cs->cst->flags[1] = RADEON_CS_RING_VCE; 564 cs->cst->cs.num_chunks = 3; 565 break; 566 567 default: 568 case RING_GFX: 569 cs->cst->flags[0] = 0; 570 cs->cst->flags[1] = RADEON_CS_RING_GFX; 571 cs->cst->cs.num_chunks = 2; 572 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { 573 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; 574 cs->cst->cs.num_chunks = 3; 575 } 576 if (cs->ws->info.r600_virtual_address) { 577 cs->cst->flags[0] |= RADEON_CS_USE_VM; 578 cs->cst->cs.num_chunks = 3; 579 } 580 if (flags & RADEON_FLUSH_END_OF_FRAME) { 581 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; 582 cs->cst->cs.num_chunks = 3; 583 } 584 if (flags & RADEON_FLUSH_COMPUTE) { 585 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; 586 cs->cst->cs.num_chunks = 3; 587 } 588 break; 589 } 590 591 if (cs->ws->thread) { 592 pipe_semaphore_wait(&cs->flush_completed); 593 radeon_drm_ws_queue_cs(cs->ws, cs); 594 if (!(flags & RADEON_FLUSH_ASYNC)) 595 radeon_drm_cs_sync_flush(rcs); 596 } else { 597 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); 598 } 599 } else { 600 radeon_cs_context_cleanup(cs->cst); 601 } 602 603 /* Prepare a new CS. */ 604 cs->base.buf = cs->csc->buf; 605 cs->base.cdw = 0; 606} 607 608static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) 609{ 610 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 611 612 radeon_drm_cs_sync_flush(rcs); 613 pipe_semaphore_destroy(&cs->flush_completed); 614 radeon_cs_context_cleanup(&cs->csc1); 615 radeon_cs_context_cleanup(&cs->csc2); 616 p_atomic_dec(&cs->ws->num_cs); 617 radeon_destroy_cs_context(&cs->csc1); 618 radeon_destroy_cs_context(&cs->csc2); 619 FREE(cs); 620} 621 622static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs, 623 void (*flush)(void *ctx, unsigned flags), 624 void *user) 625{ 626 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 627 628 cs->flush_cs = flush; 629 cs->flush_data = user; 630} 631 632static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, 633 struct radeon_winsys_cs_handle *_buf, 634 enum radeon_bo_usage usage) 635{ 636 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 637 struct radeon_bo *bo = (struct radeon_bo*)_buf; 638 int index; 639 640 if (!bo->num_cs_references) 641 return FALSE; 642 643 index = radeon_get_reloc(cs->csc, bo); 644 if (index == -1) 645 return FALSE; 646 647 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) 648 return TRUE; 649 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) 650 return TRUE; 651 652 return FALSE; 653} 654 655/* FENCES */ 656 657static struct pipe_fence_handle * 658radeon_cs_create_fence(struct radeon_winsys_cs *rcs) 659{ 660 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 661 struct pb_buffer *fence; 662 663 /* Create a fence, which is a dummy BO. */ 664 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE, 665 RADEON_DOMAIN_GTT); 666 /* Add the fence as a dummy relocation. */ 667 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), 668 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT); 669 return (struct pipe_fence_handle*)fence; 670} 671 672static bool radeon_fence_wait(struct radeon_winsys *ws, 673 struct pipe_fence_handle *fence, 674 uint64_t timeout) 675{ 676 struct pb_buffer *rfence = (struct pb_buffer*)fence; 677 678 if (timeout == 0) 679 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); 680 681 if (timeout != PIPE_TIMEOUT_INFINITE) { 682 int64_t start_time = os_time_get(); 683 684 /* Convert to microseconds. */ 685 timeout /= 1000; 686 687 /* Wait in a loop. */ 688 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { 689 if (os_time_get() - start_time >= timeout) { 690 return FALSE; 691 } 692 os_time_sleep(10); 693 } 694 return TRUE; 695 } 696 697 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE); 698 return TRUE; 699} 700 701static void radeon_fence_reference(struct pipe_fence_handle **dst, 702 struct pipe_fence_handle *src) 703{ 704 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); 705} 706 707void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) 708{ 709 ws->base.cs_create = radeon_drm_cs_create; 710 ws->base.cs_destroy = radeon_drm_cs_destroy; 711 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; 712 ws->base.cs_validate = radeon_drm_cs_validate; 713 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; 714 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; 715 ws->base.cs_flush = radeon_drm_cs_flush; 716 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; 717 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; 718 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; 719 ws->base.cs_create_fence = radeon_cs_create_fence; 720 ws->base.fence_wait = radeon_fence_wait; 721 ws->base.fence_reference = radeon_fence_reference; 722} 723