radeon_drm_cs.c revision 900ac63ee88a16b7fb7f0ca2b03a40259b8ebd84
1/* 2 * Copyright © 2008 Jérôme Glisse 3 * Copyright © 2010 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27/* 28 * Authors: 29 * Marek Olšák <maraeo@gmail.com> 30 * 31 * Based on work from libdrm_radeon by: 32 * Aapo Tahkola <aet@rasterburn.org> 33 * Nicolai Haehnle <prefect_@gmx.net> 34 * Jérôme Glisse <glisse@freedesktop.org> 35 */ 36 37/* 38 This file replaces libdrm's radeon_cs_gem with our own implemention. 39 It's optimized specifically for Radeon DRM. 40 Reloc writes and space checking are faster and simpler than their 41 counterparts in libdrm (the time complexity of all the functions 42 is O(1) in nearly all scenarios, thanks to hashing). 43 44 It works like this: 45 46 cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and 47 also adds the size of 'buf' to the used_gart and used_vram winsys variables 48 based on the domains, which are simply or'd for the accounting purposes. 49 The adding is skipped if the reloc is already present in the list, but it 50 accounts any newly-referenced domains. 51 52 cs_validate is then called, which just checks: 53 used_vram/gart < vram/gart_size * 0.8 54 The 0.8 number allows for some memory fragmentation. If the validation 55 fails, the pipe driver flushes CS and tries do the validation again, 56 i.e. it validates only that one operation. If it fails again, it drops 57 the operation on the floor and prints some nasty message to stderr. 58 (done in the pipe driver) 59 60 cs_write_reloc(cs, buf) just writes a reloc that has been added using 61 cs_add_reloc. The read_domain and write_domain parameters have been removed, 62 because we already specify them in cs_add_reloc. 63*/ 64 65#include "radeon_drm_cs.h" 66 67#include "util/u_memory.h" 68#include "os/os_time.h" 69 70#include <stdio.h> 71#include <stdlib.h> 72#include <stdint.h> 73#include <xf86drm.h> 74 75/* 76 * this are copy from radeon_drm, once an updated libdrm is released 77 * we should bump configure.ac requirement for it and remove the following 78 * field 79 */ 80#ifndef RADEON_CHUNK_ID_FLAGS 81#define RADEON_CHUNK_ID_FLAGS 0x03 82 83/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ 84#define RADEON_CS_KEEP_TILING_FLAGS 0x01 85#endif 86 87#ifndef RADEON_CS_USE_VM 88#define RADEON_CS_USE_VM 0x02 89/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */ 90#define RADEON_CS_RING_GFX 0 91#define RADEON_CS_RING_COMPUTE 1 92#endif 93 94#ifndef RADEON_CS_RING_DMA 95#define RADEON_CS_RING_DMA 2 96#endif 97 98#ifndef RADEON_CS_RING_UVD 99#define RADEON_CS_RING_UVD 3 100#endif 101 102#ifndef RADEON_CS_END_OF_FRAME 103#define RADEON_CS_END_OF_FRAME 0x04 104#endif 105 106 107#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) 108 109static boolean radeon_init_cs_context(struct radeon_cs_context *csc, 110 struct radeon_drm_winsys *ws) 111{ 112 csc->fd = ws->fd; 113 csc->nrelocs = 512; 114 csc->relocs_bo = (struct radeon_bo**) 115 CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); 116 if (!csc->relocs_bo) { 117 return FALSE; 118 } 119 120 csc->relocs = (struct drm_radeon_cs_reloc*) 121 CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc)); 122 if (!csc->relocs) { 123 FREE(csc->relocs_bo); 124 return FALSE; 125 } 126 127 csc->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; 128 csc->chunks[0].length_dw = 0; 129 csc->chunks[0].chunk_data = (uint64_t)(uintptr_t)csc->buf; 130 csc->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; 131 csc->chunks[1].length_dw = 0; 132 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 133 csc->chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; 134 csc->chunks[2].length_dw = 2; 135 csc->chunks[2].chunk_data = (uint64_t)(uintptr_t)&csc->flags; 136 137 csc->chunk_array[0] = (uint64_t)(uintptr_t)&csc->chunks[0]; 138 csc->chunk_array[1] = (uint64_t)(uintptr_t)&csc->chunks[1]; 139 csc->chunk_array[2] = (uint64_t)(uintptr_t)&csc->chunks[2]; 140 141 csc->cs.chunks = (uint64_t)(uintptr_t)csc->chunk_array; 142 return TRUE; 143} 144 145static void radeon_cs_context_cleanup(struct radeon_cs_context *csc) 146{ 147 unsigned i; 148 149 for (i = 0; i < csc->crelocs; i++) { 150 p_atomic_dec(&csc->relocs_bo[i]->num_cs_references); 151 radeon_bo_reference(&csc->relocs_bo[i], NULL); 152 } 153 154 csc->crelocs = 0; 155 csc->validated_crelocs = 0; 156 csc->chunks[0].length_dw = 0; 157 csc->chunks[1].length_dw = 0; 158 csc->used_gart = 0; 159 csc->used_vram = 0; 160 memset(csc->is_handle_added, 0, sizeof(csc->is_handle_added)); 161} 162 163static void radeon_destroy_cs_context(struct radeon_cs_context *csc) 164{ 165 radeon_cs_context_cleanup(csc); 166 FREE(csc->relocs_bo); 167 FREE(csc->relocs); 168} 169 170 171static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, 172 enum ring_type ring_type, 173 struct radeon_winsys_cs_handle *trace_buf) 174{ 175 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); 176 struct radeon_drm_cs *cs; 177 178 cs = CALLOC_STRUCT(radeon_drm_cs); 179 if (!cs) { 180 return NULL; 181 } 182 pipe_semaphore_init(&cs->flush_completed, 1); 183 184 cs->ws = ws; 185 cs->trace_buf = (struct radeon_bo*)trace_buf; 186 187 if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { 188 FREE(cs); 189 return NULL; 190 } 191 if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { 192 radeon_destroy_cs_context(&cs->csc1); 193 FREE(cs); 194 return NULL; 195 } 196 197 /* Set the first command buffer as current. */ 198 cs->csc = &cs->csc1; 199 cs->cst = &cs->csc2; 200 cs->base.buf = cs->csc->buf; 201 cs->base.ring_type = ring_type; 202 203 p_atomic_inc(&ws->num_cs); 204 return &cs->base; 205} 206 207#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) 208 209static INLINE void update_reloc_domains(struct drm_radeon_cs_reloc *reloc, 210 enum radeon_bo_domain rd, 211 enum radeon_bo_domain wd, 212 enum radeon_bo_domain *added_domains) 213{ 214 *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); 215 216 reloc->read_domains |= rd; 217 reloc->write_domain |= wd; 218} 219 220int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) 221{ 222 struct drm_radeon_cs_reloc *reloc; 223 unsigned i; 224 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); 225 226 if (csc->is_handle_added[hash]) { 227 i = csc->reloc_indices_hashlist[hash]; 228 reloc = &csc->relocs[i]; 229 if (reloc->handle == bo->handle) { 230 return i; 231 } 232 233 /* Hash collision, look for the BO in the list of relocs linearly. */ 234 for (i = csc->crelocs; i != 0;) { 235 --i; 236 reloc = &csc->relocs[i]; 237 if (reloc->handle == bo->handle) { 238 /* Put this reloc in the hash list. 239 * This will prevent additional hash collisions if there are 240 * several consecutive get_reloc calls for the same buffer. 241 * 242 * Example: Assuming buffers A,B,C collide in the hash list, 243 * the following sequence of relocs: 244 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC 245 * will collide here: ^ and here: ^, 246 * meaning that we should get very few collisions in the end. */ 247 csc->reloc_indices_hashlist[hash] = i; 248 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ 249 return i; 250 } 251 } 252 } 253 254 return -1; 255} 256 257static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, 258 struct radeon_bo *bo, 259 enum radeon_bo_usage usage, 260 enum radeon_bo_domain domains, 261 enum radeon_bo_domain *added_domains) 262{ 263 struct radeon_cs_context *csc = cs->csc; 264 struct drm_radeon_cs_reloc *reloc; 265 unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); 266 enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; 267 enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; 268 bool update_hash = TRUE; 269 int i; 270 271 *added_domains = 0; 272 if (csc->is_handle_added[hash]) { 273 i = csc->reloc_indices_hashlist[hash]; 274 reloc = &csc->relocs[i]; 275 if (reloc->handle != bo->handle) { 276 /* Hash collision, look for the BO in the list of relocs linearly. */ 277 for (i = csc->crelocs - 1; i >= 0; i--) { 278 reloc = &csc->relocs[i]; 279 if (reloc->handle == bo->handle) { 280 /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ 281 break; 282 } 283 } 284 } 285 286 if (i >= 0) { 287 /* On DMA ring we need to emit as many relocation as there is use of the bo 288 * thus each time this function is call we should grow add again the bo to 289 * the relocation buffer 290 * 291 * Do not update the hash table if it's dma ring, so that first hash always point 292 * to first bo relocation which will the one used by the kernel. Following relocation 293 * will be ignore by the kernel memory placement (but still use by the kernel to 294 * update the cmd stream with proper buffer offset). 295 */ 296 update_hash = FALSE; 297 update_reloc_domains(reloc, rd, wd, added_domains); 298 if (cs->base.ring_type != RING_DMA) { 299 csc->reloc_indices_hashlist[hash] = i; 300 return i; 301 } 302 } 303 } 304 305 /* New relocation, check if the backing array is large enough. */ 306 if (csc->crelocs >= csc->nrelocs) { 307 uint32_t size; 308 csc->nrelocs += 10; 309 310 size = csc->nrelocs * sizeof(struct radeon_bo*); 311 csc->relocs_bo = realloc(csc->relocs_bo, size); 312 313 size = csc->nrelocs * sizeof(struct drm_radeon_cs_reloc); 314 csc->relocs = realloc(csc->relocs, size); 315 316 csc->chunks[1].chunk_data = (uint64_t)(uintptr_t)csc->relocs; 317 } 318 319 /* Initialize the new relocation. */ 320 csc->relocs_bo[csc->crelocs] = NULL; 321 radeon_bo_reference(&csc->relocs_bo[csc->crelocs], bo); 322 p_atomic_inc(&bo->num_cs_references); 323 reloc = &csc->relocs[csc->crelocs]; 324 reloc->handle = bo->handle; 325 reloc->read_domains = rd; 326 reloc->write_domain = wd; 327 reloc->flags = 0; 328 329 csc->is_handle_added[hash] = TRUE; 330 if (update_hash) { 331 csc->reloc_indices_hashlist[hash] = csc->crelocs; 332 } 333 334 csc->chunks[1].length_dw += RELOC_DWORDS; 335 336 *added_domains = rd | wd; 337 return csc->crelocs++; 338} 339 340static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, 341 struct radeon_winsys_cs_handle *buf, 342 enum radeon_bo_usage usage, 343 enum radeon_bo_domain domains) 344{ 345 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 346 struct radeon_bo *bo = (struct radeon_bo*)buf; 347 enum radeon_bo_domain added_domains; 348 unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains); 349 350 if (added_domains & RADEON_DOMAIN_GTT) 351 cs->csc->used_gart += bo->base.size; 352 if (added_domains & RADEON_DOMAIN_VRAM) 353 cs->csc->used_vram += bo->base.size; 354 355 return index; 356} 357 358static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs) 359{ 360 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 361 boolean status = 362 cs->csc->used_gart < cs->ws->info.gart_size * 0.8 && 363 cs->csc->used_vram < cs->ws->info.vram_size * 0.8; 364 365 if (status) { 366 cs->csc->validated_crelocs = cs->csc->crelocs; 367 } else { 368 /* Remove lately-added relocations. The validation failed with them 369 * and the CS is about to be flushed because of that. Keep only 370 * the already-validated relocations. */ 371 unsigned i; 372 373 for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) { 374 p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references); 375 radeon_bo_reference(&cs->csc->relocs_bo[i], NULL); 376 } 377 cs->csc->crelocs = cs->csc->validated_crelocs; 378 379 /* Flush if there are any relocs. Clean up otherwise. */ 380 if (cs->csc->crelocs) { 381 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC); 382 } else { 383 radeon_cs_context_cleanup(cs->csc); 384 385 assert(cs->base.cdw == 0); 386 if (cs->base.cdw != 0) { 387 fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__); 388 } 389 } 390 } 391 return status; 392} 393 394static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, uint64_t vram, uint64_t gtt) 395{ 396 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 397 boolean status = 398 (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 && 399 (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7; 400 401 return status; 402} 403 404static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, 405 struct radeon_winsys_cs_handle *buf) 406{ 407 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 408 struct radeon_bo *bo = (struct radeon_bo*)buf; 409 unsigned index = radeon_get_reloc(cs->csc, bo); 410 411 if (index == -1) { 412 fprintf(stderr, "radeon: Cannot get a relocation in %s.\n", __func__); 413 return; 414 } 415 416 OUT_CS(&cs->base, 0xc0001000); 417 OUT_CS(&cs->base, index * RELOC_DWORDS); 418} 419 420void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) 421{ 422 unsigned i; 423 424 if (drmCommandWriteRead(csc->fd, DRM_RADEON_CS, 425 &csc->cs, sizeof(struct drm_radeon_cs))) { 426 if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { 427 unsigned i; 428 429 fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); 430 for (i = 0; i < csc->chunks[0].length_dw; i++) { 431 fprintf(stderr, "0x%08X\n", csc->buf[i]); 432 } 433 } else { 434 fprintf(stderr, "radeon: The kernel rejected CS, " 435 "see dmesg for more information.\n"); 436 } 437 } 438 439 if (cs->trace_buf) { 440 radeon_dump_cs_on_lockup(cs, csc); 441 } 442 443 for (i = 0; i < csc->crelocs; i++) 444 p_atomic_dec(&csc->relocs_bo[i]->num_active_ioctls); 445 446 radeon_cs_context_cleanup(csc); 447} 448 449/* 450 * Make sure previous submission of this cs are completed 451 */ 452void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) 453{ 454 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 455 456 /* Wait for any pending ioctl to complete. */ 457 if (cs->ws->thread) { 458 pipe_semaphore_wait(&cs->flush_completed); 459 pipe_semaphore_signal(&cs->flush_completed); 460 } 461} 462 463DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", FALSE) 464 465static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags, uint32_t cs_trace_id) 466{ 467 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 468 struct radeon_cs_context *tmp; 469 470 switch (cs->base.ring_type) { 471 case RING_DMA: 472 /* pad DMA ring to 8 DWs */ 473 if (cs->ws->info.chip_class <= SI) { 474 while (rcs->cdw & 7) 475 OUT_CS(&cs->base, 0xf0000000); /* NOP packet */ 476 } else { 477 while (rcs->cdw & 7) 478 OUT_CS(&cs->base, 0x00000000); /* NOP packet */ 479 } 480 break; 481 case RING_GFX: 482 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements 483 * r6xx, requires at least 4 dw alignment to avoid a hw bug. 484 */ 485 if (cs->ws->info.chip_class <= SI) { 486 while (rcs->cdw & 7) 487 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 488 } else { 489 while (rcs->cdw & 7) 490 OUT_CS(&cs->base, 0xffff1000); /* type3 nop packet */ 491 } 492 break; 493 case RING_UVD: 494 while (rcs->cdw & 15) 495 OUT_CS(&cs->base, 0x80000000); /* type2 nop packet */ 496 break; 497 default: 498 break; 499 } 500 501 if (rcs->cdw > RADEON_MAX_CMDBUF_DWORDS) { 502 fprintf(stderr, "radeon: command stream overflowed\n"); 503 } 504 505 radeon_drm_cs_sync_flush(rcs); 506 507 /* Flip command streams. */ 508 tmp = cs->csc; 509 cs->csc = cs->cst; 510 cs->cst = tmp; 511 512 cs->cst->cs_trace_id = cs_trace_id; 513 514 /* If the CS is not empty or overflowed, emit it in a separate thread. */ 515 if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { 516 unsigned i, crelocs = cs->cst->crelocs; 517 518 cs->cst->chunks[0].length_dw = cs->base.cdw; 519 520 for (i = 0; i < crelocs; i++) { 521 /* Update the number of active asynchronous CS ioctls for the buffer. */ 522 p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); 523 } 524 525 switch (cs->base.ring_type) { 526 case RING_DMA: 527 cs->cst->flags[0] = 0; 528 cs->cst->flags[1] = RADEON_CS_RING_DMA; 529 cs->cst->cs.num_chunks = 3; 530 if (cs->ws->info.r600_virtual_address) { 531 cs->cst->flags[0] |= RADEON_CS_USE_VM; 532 } 533 break; 534 535 case RING_UVD: 536 cs->cst->flags[0] = 0; 537 cs->cst->flags[1] = RADEON_CS_RING_UVD; 538 cs->cst->cs.num_chunks = 3; 539 break; 540 541 default: 542 case RING_GFX: 543 cs->cst->flags[0] = 0; 544 cs->cst->flags[1] = RADEON_CS_RING_GFX; 545 cs->cst->cs.num_chunks = 2; 546 if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { 547 cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; 548 cs->cst->cs.num_chunks = 3; 549 } 550 if (cs->ws->info.r600_virtual_address) { 551 cs->cst->flags[0] |= RADEON_CS_USE_VM; 552 cs->cst->cs.num_chunks = 3; 553 } 554 if (flags & RADEON_FLUSH_END_OF_FRAME) { 555 cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; 556 cs->cst->cs.num_chunks = 3; 557 } 558 if (flags & RADEON_FLUSH_COMPUTE) { 559 cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; 560 cs->cst->cs.num_chunks = 3; 561 } 562 break; 563 } 564 565 if (cs->ws->thread) { 566 pipe_semaphore_wait(&cs->flush_completed); 567 radeon_drm_ws_queue_cs(cs->ws, cs); 568 if (!(flags & RADEON_FLUSH_ASYNC)) 569 radeon_drm_cs_sync_flush(rcs); 570 } else { 571 radeon_drm_cs_emit_ioctl_oneshot(cs, cs->cst); 572 } 573 } else { 574 radeon_cs_context_cleanup(cs->cst); 575 } 576 577 /* Prepare a new CS. */ 578 cs->base.buf = cs->csc->buf; 579 cs->base.cdw = 0; 580} 581 582static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) 583{ 584 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 585 586 radeon_drm_cs_sync_flush(rcs); 587 pipe_semaphore_destroy(&cs->flush_completed); 588 radeon_cs_context_cleanup(&cs->csc1); 589 radeon_cs_context_cleanup(&cs->csc2); 590 p_atomic_dec(&cs->ws->num_cs); 591 radeon_destroy_cs_context(&cs->csc1); 592 radeon_destroy_cs_context(&cs->csc2); 593 FREE(cs); 594} 595 596static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs, 597 void (*flush)(void *ctx, unsigned flags), 598 void *user) 599{ 600 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 601 602 cs->flush_cs = flush; 603 cs->flush_data = user; 604} 605 606static boolean radeon_bo_is_referenced(struct radeon_winsys_cs *rcs, 607 struct radeon_winsys_cs_handle *_buf, 608 enum radeon_bo_usage usage) 609{ 610 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 611 struct radeon_bo *bo = (struct radeon_bo*)_buf; 612 int index; 613 614 if (!bo->num_cs_references) 615 return FALSE; 616 617 index = radeon_get_reloc(cs->csc, bo); 618 if (index == -1) 619 return FALSE; 620 621 if ((usage & RADEON_USAGE_WRITE) && cs->csc->relocs[index].write_domain) 622 return TRUE; 623 if ((usage & RADEON_USAGE_READ) && cs->csc->relocs[index].read_domains) 624 return TRUE; 625 626 return FALSE; 627} 628 629/* FENCES */ 630 631static struct pipe_fence_handle * 632radeon_cs_create_fence(struct radeon_winsys_cs *rcs) 633{ 634 struct radeon_drm_cs *cs = radeon_drm_cs(rcs); 635 struct pb_buffer *fence; 636 637 /* Create a fence, which is a dummy BO. */ 638 fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE, 639 RADEON_DOMAIN_GTT); 640 /* Add the fence as a dummy relocation. */ 641 cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), 642 RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT); 643 return (struct pipe_fence_handle*)fence; 644} 645 646static bool radeon_fence_wait(struct radeon_winsys *ws, 647 struct pipe_fence_handle *fence, 648 uint64_t timeout) 649{ 650 struct pb_buffer *rfence = (struct pb_buffer*)fence; 651 652 if (timeout == 0) 653 return !ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE); 654 655 if (timeout != PIPE_TIMEOUT_INFINITE) { 656 int64_t start_time = os_time_get(); 657 658 /* Convert to microseconds. */ 659 timeout /= 1000; 660 661 /* Wait in a loop. */ 662 while (ws->buffer_is_busy(rfence, RADEON_USAGE_READWRITE)) { 663 if (os_time_get() - start_time >= timeout) { 664 return FALSE; 665 } 666 os_time_sleep(10); 667 } 668 return TRUE; 669 } 670 671 ws->buffer_wait(rfence, RADEON_USAGE_READWRITE); 672 return TRUE; 673} 674 675static void radeon_fence_reference(struct pipe_fence_handle **dst, 676 struct pipe_fence_handle *src) 677{ 678 pb_reference((struct pb_buffer**)dst, (struct pb_buffer*)src); 679} 680 681void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) 682{ 683 ws->base.cs_create = radeon_drm_cs_create; 684 ws->base.cs_destroy = radeon_drm_cs_destroy; 685 ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; 686 ws->base.cs_validate = radeon_drm_cs_validate; 687 ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; 688 ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; 689 ws->base.cs_flush = radeon_drm_cs_flush; 690 ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; 691 ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; 692 ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; 693 ws->base.cs_create_fence = radeon_cs_create_fence; 694 ws->base.fence_wait = radeon_fence_wait; 695 ws->base.fence_reference = radeon_fence_reference; 696} 697