radeon_drm_cs.c revision 07c65b85eada8dd34019763b6e82ed4257a9b4a6
1/* 2 * Copyright © 2008 Jérôme Glisse 3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27/* 28 * Authors: 29 * Marek Olšák <maraeo@gmail.com> 30 * 31 * Based on work from libdrm_radeon by: 32 * Aapo Tahkola <aet@rasterburn.org> 33 * Nicolai Haehnle <prefect_@gmx.net> 34 * Jérôme Glisse <glisse@freedesktop.org> 35 */ 36 37/* 38 This file replaces libdrm's radeon_cs_gem with our own implemention. 39 It's optimized specifically for Radeon DRM. 40 Reloc writes and space checking are faster and simpler than their 41 counterparts in libdrm (the time complexity of all the functions 42 is O(1) in nearly all scenarios, thanks to hashing). 43 44 It works like this: 45 46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and 47 also adds the size of 'buf' to the used_gart and used_vram winsys variables 48 based on the domains, which are simply or'd for the accounting purposes. 49 The adding is skipped if the reloc is already present in the list, but it 50 accounts any newly-referenced domains. 51 52 cs_validate is then called, which just checks: 53 used_vram/gart < vram/gart_size * 0.8 54 The 0.8 number allows for some memory fragmentation. If the validation 55 fails, the pipe driver flushes CS and tries do the validation again, 56 i.e. it validates only that one operation. If it fails again, it drops 57 the operation on the floor and prints some nasty message to stderr. 58 (done in the pipe driver) 59 60 cs_write_reloc(cs, buf) just writes a reloc that has been added using 61 cs_add_reloc. The read_domain and write_domain parameters have been removed, 62 because we already specify them in cs_add_reloc. 63*/ 64 65#include "radeon_drm_cs.h" 66 67#include "util/u_memory.h" 68#include "os/os_time.h" 69 70#include <stdio.h> 71#include <stdlib.h> 72#include <stdint.h> 73#include <xf86drm.h> 74 75 76#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) 77 78static struct pipe_fence_handle * 79radeon_cs_create_fence(struct radeon_winsys_cs *rcs); 80static void radeon_fence_reference(struct pipe_fence_handle **dst, 81 struct pipe_fence_handle *src); 82 83static boolean radeon_init_cs_context(struct radeon_cs_context *csc, 84 struct radeon_drm_winsys *ws) 85{ 86 int i; 87 88 csc->fd = ws->fd; 89 csc->nrelocs = 512; 90 csc->relocs_bo = (struct radeon_bo**) 91 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); 92 if (!csc->relocs_bo) { 93 return FALSE; 94 } 95 96 csc->relocs = (struct drm_radeon_cs_reloc*) 97 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); 98 if (!csc->relocs) { 99 FREE(csc->relocs_bo); 100 return FALSE; 101 } 102 103 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; 104 csc->chunks[0].length_dw = 0; 105 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; 106 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; 107 csc->chunks[1].length_dw = 0; 108 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 109 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; 110 csc->chunks[2].length_dw = 2; 111 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags; 112 113 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; 114 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; 115 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; 116 117 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; 118 119 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) { 120 csc->reloc_indices_hashlist[i] = -1; 121 } 122 return TRUE; 123} 124 125static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) 126{ 127 unsigned i; 128 129 for (i = 0; i < csc->crelocs; i++) { 130 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references); 131 radeon_bo_reference(&csc->relocs_bo[i], NULL); 132 } 133 134 csc->crelocs = 0; 135 csc->validated_crelocs = 0; 136 csc->chunks[0].length_dw = 0; 137 csc->chunks[1].length_dw = 0; 138 csc->used_gart = 0; 139 csc->used_vram = 0; 140 141 for (i = 0; i < Elements(csc->reloc_indices_hashlist); i++) { 142 csc->reloc_indices_hashlist[i] = -1; 143 } 144} 145 146static void radeon_destroy_cs_context(struct radeon_cs_context *csc) 147{ 148 radeon_cs_context_cleanup(csc); 149 FREE(csc->relocs_bo); 150 FREE(csc->relocs); 151} 152 153 154static struct radeon_winsys_cs * 155radeon_drm_cs_create(struct radeon_winsys *rws, 156 enum ring_type ring_type, 157 void (*flush)(void *ctx, unsigned flags, 158 struct pipe_fence_handle **fence), 159 void *flush_ctx, 160 struct radeon_winsys_cs_handle *trace_buf) 161{ 162 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); 163 struct radeon_drm_cs *cs; 164 165 cs = CALLOC_STRUCT(radeon_drm_cs); 166 if (!cs) { 167 return NULL; 168 } 169 pipe_semaphore_init(&cs->flush_completed, 1); 170 171 cs->ws = ws; 172 cs->flush_cs = flush; 173 cs->flush_data = flush_ctx; 174 cs->trace_buf = (struct radeon_bo*)trace_buf; 175 176 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { 177 FREE(cs); 178 return NULL; 179 } 180 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { 181 radeon_destroy_cs_context(&cs->csc1); 182 FREE(cs); 183 return NULL; 184 } 185 186 /* Set the first command buffer as current. */ 187 cs->csc = &cs->csc1; 188 cs->cst = &cs->csc2; 189 cs->base.buf = cs->csc->buf; 190 cs->base.ring_type = ring_type; 191 192 p_atomic_inc(&ws->num_cs); 193 return &cs->base; 194} 195 196#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) 197 198static INLINE void update_reloc(struct drm_radeon_cs_reloc *reloc, 199 enum radeon_bo_domain rd, 200 enum radeon_bo_domain wd, 201 unsigned priority, 202 enum radeon_bo_domain *added_domains) 203{ 204 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); 205 206 reloc->read_domains |= rd; 207 reloc->write_domain |= wd; 208 reloc->flags = MAX2(reloc->flags, priority); 209} 210 211int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) 212{ 213 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1); 214 int i = csc->reloc_indices_hashlist[hash]; 215 216 /* not found or found */ 217 if (i == -1 || csc->relocs_bo[i] == bo) 218 return i; 219 220 /* Hash collision, look for the BO in the list of relocs linearly. */ 221 for (i = csc->crelocs - 1; i >= 0; i--) { 222 if (csc->relocs_bo[i] == bo) { 223 /* Put this reloc in the hash list. 224 * This will prevent additional hash collisions if there are 225 * several consecutive get_reloc calls for the same buffer. 226 * 227 * Example: Assuming buffers A,B,C collide in the hash list, 228 * the following sequence of relocs: 229 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC 230 * will collide here: ^ and here: ^, 231 * meaning that we should get very few collisions in the end. */ 232 csc->reloc_indices_hashlist[hash] = i; 233 return i; 234 } 235 } 236 return -1; 237} 238 239static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, 240 struct radeon_bo *bo, 241 enum radeon_bo_usage usage, 242 enum radeon_bo_domain domains, 243 unsigned priority, 244 enum radeon_bo_domain *added_domains) 245{ 246 struct radeon_cs_context *csc = cs->csc; 247 struct drm_radeon_cs_reloc *reloc; 248 unsigned hash = bo->handle & (Elements(csc->reloc_indices_hashlist)-1); 249 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; 250 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; 251 int i = -1; 252 253 priority = MIN2(priority, 15); 254 *added_domains = 0; 255 256 i = radeon_get_reloc(csc, bo); 257 258 if (i >= 0) { 259 reloc = &csc->relocs[i]; 260 update_reloc(reloc, rd, wd, priority, added_domains); 261 262 /* For async DMA, every add_reloc call must add a buffer to the list 263 * no matter how many duplicates there are. This is due to the fact 264 * the DMA CS checker doesn't use NOP packets for offset patching, 265 * but always uses the i-th buffer from the list to patch the i-th 266 * offset. If there are N offsets in a DMA CS, there must also be N 267 * buffers in the relocation list. 268 * 269 * This doesn't have to be done if virtual memory is enabled, 270 * because there is no offset patching with virtual memory. 271 */ 272 if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) { 273 return i; 274 } 275 } 276 277 /* New relocation, check if the backing array is large enough. */ 278 if (csc->crelocs >= csc->nrelocs) { 279 uint32_t size; 280 csc->nrelocs += 10; 281 282 size = csc->nrelocs * sizeof(struct radeon_bo*); 283 csc->relocs_bo = realloc(csc->relocs_bo, size); 284 285 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); 286 csc->relocs = realloc(csc->relocs, size); 287 288 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 289 } 290 291 /* Initialize the new relocation. */ 292 csc->relocs_bo[csc->crelocs] = NULL; 293 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo); 294 p_atomic_inc(&bo->num_cs_references); 295 reloc = &csc->relocs[csc->crelocs]; 296 reloc->handle = bo->handle; 297 reloc->read_domains = rd; 298 reloc->write_domain = wd; 299 reloc->flags = priority; 300 301 csc->reloc_indices_hashlist[hash] = csc->crelocs; 302 303 csc->chunks[1].length_dw += RELOC_DWORDS; 304 305 *added_domains = rd | wd; 306 return csc->crelocs++; 307} 308 309static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, 310 struct radeon_winsys_cs_handle *buf, 311 enum radeon_bo_usage usage, 312 enum radeon_bo_domain domains, 313 enum radeon_bo_priority priority) 314{ 315 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 316 struct radeon_bo *bo = (struct radeon_bo*)buf; 317 enum radeon_bo_domain added_domains; 318 unsigned index = radeon_add_reloc(cs, bo, usage, domains, priority, &added_domains); 319 320 if (added_domains & RADEON_DOMAIN_GTT) 321 cs->csc->used_gart += bo->base.size; 322 if (added_domains & RADEON_DOMAIN_VRAM) 323 cs->csc->used_vram += bo->base.size; 324 325 return index; 326} 327 328static int radeon_drm_cs_get_reloc(struct radeon_winsys_cs *rcs, 329 struct radeon_winsys_cs_handle *buf) 330{ 331 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 332 333 return radeon_get_reloc(cs->csc, (struct radeon_bo*)buf); 334} 335 336static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) 337{ 338 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 339 boolean status = 340 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && 341 cs->csc->used_vram < cs->ws->info.vram_size * 0.8; 342 343 if (status) { 344 cs->csc->validated_crelocs = cs->csc->crelocs; 345 } else { 346 /* Remove lately-added relocations. The validation failed with them 347 * and the CS is about to be flushed because of that. Keep only 348 * the already-validated relocations. */ 349 unsigned i; 350 351 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { 352 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); 353 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); 354 } 355 cs->csc->crelocs = cs->csc->validated_crelocs; 356 357 /* Flush if there are any relocs. Clean up otherwise. */ 358 if (cs->csc->crelocs) { 359 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL); 360 } else { 361 radeon_cs_context_cleanup(cs->csc); 362 363 assert(cs->base.cdw == 0); 364 if (cs->base.cdw != 0) { 365 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); 366 } 367 } 368 } 369 return status; 370} 371 372static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt) 373{ 374 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 375 boolean status = 376 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 && 377 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7; 378 379 return status; 380} 381 382void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) 383{ 384 unsigned i; 385 386 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS, 387 &csc->cs, sizeof(struct drm_radeon_cs))) { 388 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { 389 unsigned i; 390 391 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); 392 for (i = 0; i < csc->chunks[0].length_dw; i++) { 393 fprintf(stderr, "0x%08X\n", csc->buf[i]); 394 } 395 } else { 396 fprintf(stderr, "radeon: The kernel rejected CS, " 397 "see dmesg for more information.\n"); 398 } 399 } 400 401 if (cs->trace_buf) { 402 radeon_dump_cs_on_lockup(cs, csc); 403 } 404 405 for (i = 0; i < csc->crelocs; i++) 406 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); 407 408 radeon_cs_context_cleanup(csc); 409} 410 411/* 412 * Make sure previous submission of this cs are completed 413 */ 414void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) 415{ 416 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 417 418 /* Wait for any pending ioctl to complete. */ 419 if (cs->ws->thread) { 420 pipe_semaphore_wait(&cs->flush_completed); 421 pipe_semaphore_signal(&cs->flush_completed); 422 } 423} 424 425DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE) 426 427static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, 428 unsigned flags, 429 struct pipe_fence_handle **fence, 430 uint32_t cs_trace_id) 431{ 432 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 433 struct radeon_cs_context *tmp; 434 435 switch (cs->base.ring_type) { 436 case RING_DMA: 437 /* pad DMA ring to 8 DWs */ 438 if (cs->ws->info.chip_class <= SI) { 439 while (rcs->cdw & 7) 440 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ 441 } else { 442 while (rcs->cdw & 7) 443 OUT_CS(&cs->base, 0x00000000); /* NOP packet */ 444 } 445 break; 446 case RING_GFX: 447 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements 448 * r6xx, requires at least 4 dw alignment to avoid a hw bug. 449 */ 450 if (cs->ws->info.chip_class <= SI) { 451 while (rcs->cdw & 7) 452 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 453 } else { 454 while (rcs->cdw & 7) 455 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ 456 } 457 break; 458 case RING_UVD: 459 while (rcs->cdw & 15) 460 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 461 break; 462 default: 463 break; 464 } 465 466 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { 467 fprintf(stderr, "radeon: command stream overflowed\n"); 468 } 469 470 if (fence) { 471 radeon_fence_reference(fence, NULL); 472 *fence = radeon_cs_create_fence(rcs); 473 } 474 475 radeon_drm_cs_sync_flush(rcs); 476 477 /* Swap command streams. */ 478 tmp = cs->csc; 479 cs->csc = cs->cst; 480 cs->cst = tmp; 481 482 cs->cst->cs_trace_id = cs_trace_id; 483 484 /* If the CS is not empty or overflowed, emit it in a separate thread. */ 485 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { 486 unsigned i, crelocs; 487 488 crelocs = cs->cst->crelocs; 489 490 cs->cst->chunks[0].length_dw = cs->base.cdw; 491 492 for (i = 0; i < crelocs; i++) { 493 /* Update the number of active asynchronous CS ioctls for the buffer. */ 494 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); 495 } 496 497 switch (cs->base.ring_type) { 498 case RING_DMA: 499 cs->cst->flags[0] = 0; 500 cs->cst->flags[1] = RADEON_CS_RING_DMA; 501 cs->cst->cs.num_chunks = 3; 502 if (cs->ws->info.r600_virtual_address) { 503 cs->cst->flags[0] |= RADEON_CS_USE_VM; 504 } 505 break; 506 507 case RING_UVD: 508 cs->cst->flags[0] = 0; 509 cs->cst->flags[1] = RADEON_CS_RING_UVD; 510 cs->cst->cs.num_chunks = 3; 511 break; 512 513 case RING_VCE: 514 cs->cst->flags[0] = 0; 515 cs->cst->flags[1] = RADEON_CS_RING_VCE; 516 cs->cst->cs.num_chunks = 3; 517 break; 518 519 default: 520 case RING_GFX: 521 cs->cst->flags[0] = 0; 522 cs->cst->flags[1] = RADEON_CS_RING_GFX; 523 cs->cst->cs.num_chunks = 2; 524 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { 525 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; 526 cs->cst->cs.num_chunks = 3; 527 } 528 if (cs->ws->info.r600_virtual_address) { 529 cs->cst->flags[0] |= RADEON_CS_USE_VM; 530 cs->cst->cs.num_chunks = 3; 531 } 532 if (flags & RADEON_FLUSH_END_OF_FRAME) { 533 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; 534 cs->cst->cs.num_chunks = 3; 535 } 536 if (flags & RADEON_FLUSH_COMPUTE) { 537 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; 538 cs->cst->cs.num_chunks = 3; 539 } 540 break; 541 } 542 543 if (cs->ws->thread) { 544 pipe_semaphore_wait(&cs->flush_completed); 545 radeon_drm_ws_queue_cs(cs->ws, cs); 546 if (!(flags & RADEON_FLUSH_ASYNC)) 547 radeon_drm_cs_sync_flush(rcs); 548 } else { 549 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); 550 } 551 } else { 552 radeon_cs_context_cleanup(cs->cst); 553 } 554 555 /* Prepare a new CS. */ 556 cs->base.buf = cs->csc->buf; 557 cs->base.cdw = 0; 558 559 cs->ws->num_cs_flushes++; 560} 561 562static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) 563{ 564 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 565 566 radeon_drm_cs_sync_flush(rcs); 567 pipe_semaphore_destroy(&cs->flush_completed); 568 radeon_cs_context_cleanup(&cs->csc1); 569 radeon_cs_context_cleanup(&cs->csc2); 570 p_atomic_dec(&cs->ws->num_cs); 571 radeon_destroy_cs_context(&cs->csc1); 572 radeon_destroy_cs_context(&cs->csc2); 573 FREE(cs); 574} 575 576static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, 577 struct radeon_winsys_cs_handle *_buf, 578 enum radeon_bo_usage usage) 579{ 580 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 581 struct radeon_bo *bo = (struct radeon_bo*)_buf; 582 int index; 583 584 if (!bo->num_cs_references) 585 return FALSE; 586 587 index = radeon_get_reloc(cs->csc, bo); 588 if (index == -1) 589 return FALSE; 590 591 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) 592 return TRUE; 593 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) 594 return TRUE; 595 596 return FALSE; 597} 598 599/* FENCES */ 600 601static struct pipe_fence_handle * 602radeon_cs_create_fence(struct radeon_winsys_cs *rcs) 603{ 604 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 605 struct pb_buffer *fence; 606 607 /* Create a fence, which is a dummy BO. */ 608 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE, 609 RADEON_DOMAIN_GTT, 0); 610 /* Add the fence as a dummy relocation. */ 611 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), 612 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, 613 RADEON_PRIO_MIN); 614 return (struct pipe_fence_handle*)fence; 615} 616 617static bool radeon_fence_wait(struct radeon_winsys *ws, 618 struct pipe_fence_handle *fence, 619 uint64_t timeout) 620{ 621 struct pb_buffer *rfence = (struct pb_buffer*)fence; 622 623 if (timeout == 0) 624 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); 625 626 if (timeout != PIPE_TIMEOUT_INFINITE) { 627 int64_t start_time = os_time_get(); 628 629 /* Convert to microseconds. */ 630 timeout /= 1000; 631 632 /* Wait in a loop. */ 633 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { 634 if (os_time_get() - start_time >= timeout) { 635 return FALSE; 636 } 637 os_time_sleep(10); 638 } 639 return TRUE; 640 } 641 642 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE); 643 return TRUE; 644} 645 646static void radeon_fence_reference(struct pipe_fence_handle **dst, 647 struct pipe_fence_handle *src) 648{ 649 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); 650} 651 652void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) 653{ 654 ws->base.cs_create = radeon_drm_cs_create; 655 ws->base.cs_destroy = radeon_drm_cs_destroy; 656 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; 657 ws->base.cs_get_reloc = radeon_drm_cs_get_reloc; 658 ws->base.cs_validate = radeon_drm_cs_validate; 659 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; 660 ws->base.cs_flush = radeon_drm_cs_flush; 661 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; 662 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; 663 ws->base.fence_wait = radeon_fence_wait; 664 ws->base.fence_reference = radeon_fence_reference; 665} 666