intel_bufmgr_gem.c revision b73612e4fd69565aa2c5c2e9677f3e0af1945f7d
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <fcntl.h> 43#include <stdio.h> 44#include <stdlib.h> 45#include <string.h> 46#include <unistd.h> 47#include <assert.h> 48#include <pthread.h> 49#include <sys/ioctl.h> 50#include <sys/mman.h> 51#include <sys/stat.h> 52#include <sys/types.h> 53 54#include "errno.h" 55#include "libdrm_lists.h" 56#include "intel_atomic.h" 57#include "intel_bufmgr.h" 58#include "intel_bufmgr_priv.h" 59#include "intel_chipset.h" 60#include "string.h" 61 62#include "i915_drm.h" 63 64#define DBG(...) do { \ 65 if (bufmgr_gem->bufmgr.debug) \ 66 fprintf(stderr, __VA_ARGS__); \ 67} while (0) 68 69typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 70 71struct drm_intel_gem_bo_bucket { 72 drmMMListHead head; 73 unsigned long size; 74}; 75 76/* Only cache objects up to 64MB. Bigger than that, and the rounding of the 77 * size makes many operations fail that wouldn't otherwise. 78 */ 79#define DRM_INTEL_GEM_BO_BUCKETS 14 80typedef struct _drm_intel_bufmgr_gem { 81 drm_intel_bufmgr bufmgr; 82 83 int fd; 84 85 int max_relocs; 86 87 pthread_mutex_t lock; 88 89 struct drm_i915_gem_exec_object *exec_objects; 90 drm_intel_bo **exec_bos; 91 int exec_size; 92 int exec_count; 93 94 /** Array of lists of cached gem objects of power-of-two sizes */ 95 struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS]; 96 97 uint64_t gtt_size; 98 int available_fences; 99 int pci_device; 100 char bo_reuse; 101} drm_intel_bufmgr_gem; 102 103struct _drm_intel_bo_gem { 104 drm_intel_bo bo; 105 106 atomic_t refcount; 107 uint32_t gem_handle; 108 const char *name; 109 110 /** 111 * Kenel-assigned global name for this object 112 */ 113 unsigned int global_name; 114 115 /** 116 * Index of the buffer within the validation list while preparing a 117 * batchbuffer execution. 118 */ 119 int validate_index; 120 121 /** 122 * Current tiling mode 123 */ 124 uint32_t tiling_mode; 125 uint32_t swizzle_mode; 126 127 time_t free_time; 128 129 /** Array passed to the DRM containing relocation information. */ 130 struct drm_i915_gem_relocation_entry *relocs; 131 /** Array of bos corresponding to relocs[i].target_handle */ 132 drm_intel_bo **reloc_target_bo; 133 /** Number of entries in relocs */ 134 int reloc_count; 135 /** Mapped address for the buffer, saved across map/unmap cycles */ 136 void *mem_virtual; 137 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 138 void *gtt_virtual; 139 140 /** BO cache list */ 141 drmMMListHead head; 142 143 /** 144 * Boolean of whether this BO and its children have been included in 145 * the current drm_intel_bufmgr_check_aperture_space() total. 146 */ 147 char included_in_check_aperture; 148 149 /** 150 * Boolean of whether this buffer has been used as a relocation 151 * target and had its size accounted for, and thus can't have any 152 * further relocations added to it. 153 */ 154 char used_as_reloc_target; 155 156 /** 157 * Boolean of whether this buffer can be re-used 158 */ 159 char reusable; 160 161 /** 162 * Size in bytes of this buffer and its relocation descendents. 163 * 164 * Used to avoid costly tree walking in 165 * drm_intel_bufmgr_check_aperture in the common case. 166 */ 167 int reloc_tree_size; 168 169 /** 170 * Number of potential fence registers required by this buffer and its 171 * relocations. 172 */ 173 int reloc_tree_fences; 174}; 175 176static unsigned int 177drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 178 179static unsigned int 180drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 181 182static int 183drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 184 uint32_t * swizzle_mode); 185 186static int 187drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 188 uint32_t stride); 189 190static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 191 time_t time); 192 193static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 194 195static void drm_intel_gem_bo_free(drm_intel_bo *bo); 196 197static unsigned long 198drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 199 uint32_t *tiling_mode) 200{ 201 unsigned long min_size, max_size; 202 unsigned long i; 203 204 if (*tiling_mode == I915_TILING_NONE) 205 return size; 206 207 /* 965+ just need multiples of page size for tiling */ 208 if (IS_I965G(bufmgr_gem)) 209 return ROUND_UP_TO(size, 4096); 210 211 /* Older chips need powers of two, of at least 512k or 1M */ 212 if (IS_I9XX(bufmgr_gem)) { 213 min_size = 1024*1024; 214 max_size = 128*1024*1024; 215 } else { 216 min_size = 512*1024; 217 max_size = 64*1024*1024; 218 } 219 220 if (size > max_size) { 221 *tiling_mode = I915_TILING_NONE; 222 return size; 223 } 224 225 for (i = min_size; i < size; i <<= 1) 226 ; 227 228 return i; 229} 230 231/* 232 * Round a given pitch up to the minimum required for X tiling on a 233 * given chip. We use 512 as the minimum to allow for a later tiling 234 * change. 235 */ 236static unsigned long 237drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 238 unsigned long pitch, uint32_t tiling_mode) 239{ 240 unsigned long tile_width = 512; 241 unsigned long i; 242 243 if (tiling_mode == I915_TILING_NONE) 244 return ROUND_UP_TO(pitch, tile_width); 245 246 /* 965 is flexible */ 247 if (IS_I965G(bufmgr_gem)) 248 return ROUND_UP_TO(pitch, tile_width); 249 250 /* Pre-965 needs power of two tile width */ 251 for (i = tile_width; i < pitch; i <<= 1) 252 ; 253 254 return i; 255} 256 257static struct drm_intel_gem_bo_bucket * 258drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 259 unsigned long size) 260{ 261 int i; 262 263 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 264 struct drm_intel_gem_bo_bucket *bucket = 265 &bufmgr_gem->cache_bucket[i]; 266 if (bucket->size >= size) { 267 return bucket; 268 } 269 } 270 271 return NULL; 272} 273 274static void 275drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 276{ 277 int i, j; 278 279 for (i = 0; i < bufmgr_gem->exec_count; i++) { 280 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 281 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 282 283 if (bo_gem->relocs == NULL) { 284 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 285 bo_gem->name); 286 continue; 287 } 288 289 for (j = 0; j < bo_gem->reloc_count; j++) { 290 drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j]; 291 drm_intel_bo_gem *target_gem = 292 (drm_intel_bo_gem *) target_bo; 293 294 DBG("%2d: %d (%s)@0x%08llx -> " 295 "%d (%s)@0x%08lx + 0x%08x\n", 296 i, 297 bo_gem->gem_handle, bo_gem->name, 298 (unsigned long long)bo_gem->relocs[j].offset, 299 target_gem->gem_handle, 300 target_gem->name, 301 target_bo->offset, 302 bo_gem->relocs[j].delta); 303 } 304 } 305} 306 307static inline void 308drm_intel_gem_bo_reference(drm_intel_bo *bo) 309{ 310 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 311 312 assert(atomic_read(&bo_gem->refcount) > 0); 313 atomic_inc(&bo_gem->refcount); 314} 315 316/** 317 * Adds the given buffer to the list of buffers to be validated (moved into the 318 * appropriate memory type) with the next batch submission. 319 * 320 * If a buffer is validated multiple times in a batch submission, it ends up 321 * with the intersection of the memory type flags and the union of the 322 * access flags. 323 */ 324static void 325drm_intel_add_validate_buffer(drm_intel_bo *bo) 326{ 327 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 328 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 329 int index; 330 331 if (bo_gem->validate_index != -1) 332 return; 333 334 /* Extend the array of validation entries as necessary. */ 335 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 336 int new_size = bufmgr_gem->exec_size * 2; 337 338 if (new_size == 0) 339 new_size = 5; 340 341 bufmgr_gem->exec_objects = 342 realloc(bufmgr_gem->exec_objects, 343 sizeof(*bufmgr_gem->exec_objects) * new_size); 344 bufmgr_gem->exec_bos = 345 realloc(bufmgr_gem->exec_bos, 346 sizeof(*bufmgr_gem->exec_bos) * new_size); 347 bufmgr_gem->exec_size = new_size; 348 } 349 350 index = bufmgr_gem->exec_count; 351 bo_gem->validate_index = index; 352 /* Fill in array entry */ 353 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 354 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 355 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 356 bufmgr_gem->exec_objects[index].alignment = 0; 357 bufmgr_gem->exec_objects[index].offset = 0; 358 bufmgr_gem->exec_bos[index] = bo; 359 bufmgr_gem->exec_count++; 360} 361 362#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 363 sizeof(uint32_t)) 364 365static void 366drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 367 drm_intel_bo_gem *bo_gem) 368{ 369 int size; 370 371 assert(!bo_gem->used_as_reloc_target); 372 373 /* The older chipsets are far-less flexible in terms of tiling, 374 * and require tiled buffer to be size aligned in the aperture. 375 * This means that in the worst possible case we will need a hole 376 * twice as large as the object in order for it to fit into the 377 * aperture. Optimal packing is for wimps. 378 */ 379 size = bo_gem->bo.size; 380 if (!IS_I965G(bufmgr_gem) && bo_gem->tiling_mode != I915_TILING_NONE) 381 size *= 2; 382 383 bo_gem->reloc_tree_size = size; 384} 385 386static int 387drm_intel_setup_reloc_list(drm_intel_bo *bo) 388{ 389 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 390 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 391 unsigned int max_relocs = bufmgr_gem->max_relocs; 392 393 if (bo->size / 4 < max_relocs) 394 max_relocs = bo->size / 4; 395 396 bo_gem->relocs = malloc(max_relocs * 397 sizeof(struct drm_i915_gem_relocation_entry)); 398 bo_gem->reloc_target_bo = malloc(max_relocs * sizeof(drm_intel_bo *)); 399 400 return 0; 401} 402 403static int 404drm_intel_gem_bo_busy(drm_intel_bo *bo) 405{ 406 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 407 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 408 struct drm_i915_gem_busy busy; 409 int ret; 410 411 memset(&busy, 0, sizeof(busy)); 412 busy.handle = bo_gem->gem_handle; 413 414 do { 415 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 416 } while (ret == -1 && errno == EINTR); 417 418 return (ret == 0 && busy.busy); 419} 420 421static int 422drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 423 drm_intel_bo_gem *bo_gem, int state) 424{ 425 struct drm_i915_gem_madvise madv; 426 427 madv.handle = bo_gem->gem_handle; 428 madv.madv = state; 429 madv.retained = 1; 430 ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 431 432 return madv.retained; 433} 434 435static int 436drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 437{ 438 return drm_intel_gem_bo_madvise_internal 439 ((drm_intel_bufmgr_gem *) bo->bufmgr, 440 (drm_intel_bo_gem *) bo, 441 madv); 442} 443 444/* drop the oldest entries that have been purged by the kernel */ 445static void 446drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 447 struct drm_intel_gem_bo_bucket *bucket) 448{ 449 while (!DRMLISTEMPTY(&bucket->head)) { 450 drm_intel_bo_gem *bo_gem; 451 452 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 453 bucket->head.next, head); 454 if (drm_intel_gem_bo_madvise_internal 455 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 456 break; 457 458 DRMLISTDEL(&bo_gem->head); 459 drm_intel_gem_bo_free(&bo_gem->bo); 460 } 461} 462 463static drm_intel_bo * 464drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 465 const char *name, 466 unsigned long size, 467 unsigned long flags) 468{ 469 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 470 drm_intel_bo_gem *bo_gem; 471 unsigned int page_size = getpagesize(); 472 int ret; 473 struct drm_intel_gem_bo_bucket *bucket; 474 int alloc_from_cache; 475 unsigned long bo_size; 476 int for_render = 0; 477 478 if (flags & BO_ALLOC_FOR_RENDER) 479 for_render = 1; 480 481 /* Round the allocated size up to a power of two number of pages. */ 482 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 483 484 /* If we don't have caching at this size, don't actually round the 485 * allocation up. 486 */ 487 if (bucket == NULL) { 488 bo_size = size; 489 if (bo_size < page_size) 490 bo_size = page_size; 491 } else { 492 bo_size = bucket->size; 493 } 494 495 pthread_mutex_lock(&bufmgr_gem->lock); 496 /* Get a buffer out of the cache if available */ 497retry: 498 alloc_from_cache = 0; 499 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 500 if (for_render) { 501 /* Allocate new render-target BOs from the tail (MRU) 502 * of the list, as it will likely be hot in the GPU 503 * cache and in the aperture for us. 504 */ 505 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 506 bucket->head.prev, head); 507 DRMLISTDEL(&bo_gem->head); 508 alloc_from_cache = 1; 509 } else { 510 /* For non-render-target BOs (where we're probably 511 * going to map it first thing in order to fill it 512 * with data), check if the last BO in the cache is 513 * unbusy, and only reuse in that case. Otherwise, 514 * allocating a new buffer is probably faster than 515 * waiting for the GPU to finish. 516 */ 517 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 518 bucket->head.next, head); 519 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 520 alloc_from_cache = 1; 521 DRMLISTDEL(&bo_gem->head); 522 } 523 } 524 525 if (alloc_from_cache) { 526 if (!drm_intel_gem_bo_madvise_internal 527 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 528 drm_intel_gem_bo_free(&bo_gem->bo); 529 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 530 bucket); 531 goto retry; 532 } 533 } 534 } 535 pthread_mutex_unlock(&bufmgr_gem->lock); 536 537 if (!alloc_from_cache) { 538 struct drm_i915_gem_create create; 539 540 bo_gem = calloc(1, sizeof(*bo_gem)); 541 if (!bo_gem) 542 return NULL; 543 544 bo_gem->bo.size = bo_size; 545 memset(&create, 0, sizeof(create)); 546 create.size = bo_size; 547 548 do { 549 ret = ioctl(bufmgr_gem->fd, 550 DRM_IOCTL_I915_GEM_CREATE, 551 &create); 552 } while (ret == -1 && errno == EINTR); 553 bo_gem->gem_handle = create.handle; 554 bo_gem->bo.handle = bo_gem->gem_handle; 555 if (ret != 0) { 556 free(bo_gem); 557 return NULL; 558 } 559 bo_gem->bo.bufmgr = bufmgr; 560 } 561 562 bo_gem->name = name; 563 atomic_set(&bo_gem->refcount, 1); 564 bo_gem->validate_index = -1; 565 bo_gem->reloc_tree_fences = 0; 566 bo_gem->used_as_reloc_target = 0; 567 bo_gem->tiling_mode = I915_TILING_NONE; 568 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 569 bo_gem->reusable = 1; 570 571 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 572 573 DBG("bo_create: buf %d (%s) %ldb\n", 574 bo_gem->gem_handle, bo_gem->name, size); 575 576 return &bo_gem->bo; 577} 578 579static drm_intel_bo * 580drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 581 const char *name, 582 unsigned long size, 583 unsigned int alignment) 584{ 585 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 586 BO_ALLOC_FOR_RENDER); 587} 588 589static drm_intel_bo * 590drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 591 const char *name, 592 unsigned long size, 593 unsigned int alignment) 594{ 595 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0); 596} 597 598static drm_intel_bo * 599drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 600 int x, int y, int cpp, uint32_t *tiling_mode, 601 unsigned long *pitch, unsigned long flags) 602{ 603 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 604 drm_intel_bo *bo; 605 unsigned long size, stride, aligned_y = y; 606 int ret; 607 608 if (*tiling_mode == I915_TILING_NONE) 609 aligned_y = ALIGN(y, 2); 610 else if (*tiling_mode == I915_TILING_X) 611 aligned_y = ALIGN(y, 8); 612 else if (*tiling_mode == I915_TILING_Y) 613 aligned_y = ALIGN(y, 32); 614 615 stride = x * cpp; 616 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode); 617 size = stride * aligned_y; 618 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 619 620 bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags); 621 if (!bo) 622 return NULL; 623 624 ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride); 625 if (ret != 0) { 626 drm_intel_gem_bo_unreference(bo); 627 return NULL; 628 } 629 630 *pitch = stride; 631 632 return bo; 633} 634 635/** 636 * Returns a drm_intel_bo wrapping the given buffer object handle. 637 * 638 * This can be used when one application needs to pass a buffer object 639 * to another. 640 */ 641drm_intel_bo * 642drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 643 const char *name, 644 unsigned int handle) 645{ 646 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 647 drm_intel_bo_gem *bo_gem; 648 int ret; 649 struct drm_gem_open open_arg; 650 struct drm_i915_gem_get_tiling get_tiling; 651 652 bo_gem = calloc(1, sizeof(*bo_gem)); 653 if (!bo_gem) 654 return NULL; 655 656 memset(&open_arg, 0, sizeof(open_arg)); 657 open_arg.name = handle; 658 do { 659 ret = ioctl(bufmgr_gem->fd, 660 DRM_IOCTL_GEM_OPEN, 661 &open_arg); 662 } while (ret == -1 && errno == EINTR); 663 if (ret != 0) { 664 fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n", 665 name, handle, strerror(errno)); 666 free(bo_gem); 667 return NULL; 668 } 669 bo_gem->bo.size = open_arg.size; 670 bo_gem->bo.offset = 0; 671 bo_gem->bo.virtual = NULL; 672 bo_gem->bo.bufmgr = bufmgr; 673 bo_gem->name = name; 674 atomic_set(&bo_gem->refcount, 1); 675 bo_gem->validate_index = -1; 676 bo_gem->gem_handle = open_arg.handle; 677 bo_gem->global_name = handle; 678 bo_gem->reusable = 0; 679 680 memset(&get_tiling, 0, sizeof(get_tiling)); 681 get_tiling.handle = bo_gem->gem_handle; 682 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 683 if (ret != 0) { 684 drm_intel_gem_bo_unreference(&bo_gem->bo); 685 return NULL; 686 } 687 bo_gem->tiling_mode = get_tiling.tiling_mode; 688 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 689 if (bo_gem->tiling_mode == I915_TILING_NONE) 690 bo_gem->reloc_tree_fences = 0; 691 else 692 bo_gem->reloc_tree_fences = 1; 693 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 694 695 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 696 697 return &bo_gem->bo; 698} 699 700static void 701drm_intel_gem_bo_free(drm_intel_bo *bo) 702{ 703 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 704 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 705 struct drm_gem_close close; 706 int ret; 707 708 if (bo_gem->mem_virtual) 709 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 710 if (bo_gem->gtt_virtual) 711 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 712 713 free(bo_gem->reloc_target_bo); 714 free(bo_gem->relocs); 715 716 /* Close this object */ 717 memset(&close, 0, sizeof(close)); 718 close.handle = bo_gem->gem_handle; 719 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 720 if (ret != 0) { 721 fprintf(stderr, 722 "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 723 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 724 } 725 free(bo); 726} 727 728/** Frees all cached buffers significantly older than @time. */ 729static void 730drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 731{ 732 int i; 733 734 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 735 struct drm_intel_gem_bo_bucket *bucket = 736 &bufmgr_gem->cache_bucket[i]; 737 738 while (!DRMLISTEMPTY(&bucket->head)) { 739 drm_intel_bo_gem *bo_gem; 740 741 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 742 bucket->head.next, head); 743 if (time - bo_gem->free_time <= 1) 744 break; 745 746 DRMLISTDEL(&bo_gem->head); 747 748 drm_intel_gem_bo_free(&bo_gem->bo); 749 } 750 } 751} 752 753static void 754drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 755{ 756 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 757 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 758 struct drm_intel_gem_bo_bucket *bucket; 759 uint32_t tiling_mode; 760 int i; 761 762 /* Unreference all the target buffers */ 763 for (i = 0; i < bo_gem->reloc_count; i++) { 764 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 765 reloc_target_bo[i], 766 time); 767 } 768 bo_gem->reloc_count = 0; 769 bo_gem->used_as_reloc_target = 0; 770 771 DBG("bo_unreference final: %d (%s)\n", 772 bo_gem->gem_handle, bo_gem->name); 773 774 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 775 /* Put the buffer into our internal cache for reuse if we can. */ 776 tiling_mode = I915_TILING_NONE; 777 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 778 drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 && 779 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 780 I915_MADV_DONTNEED)) { 781 bo_gem->free_time = time; 782 783 bo_gem->name = NULL; 784 bo_gem->validate_index = -1; 785 786 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 787 788 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time); 789 } else { 790 drm_intel_gem_bo_free(bo); 791 } 792} 793 794static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 795 time_t time) 796{ 797 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 798 799 assert(atomic_read(&bo_gem->refcount) > 0); 800 if (atomic_dec_and_test(&bo_gem->refcount)) 801 drm_intel_gem_bo_unreference_final(bo, time); 802} 803 804static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 805{ 806 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 807 808 assert(atomic_read(&bo_gem->refcount) > 0); 809 if (atomic_dec_and_test(&bo_gem->refcount)) { 810 drm_intel_bufmgr_gem *bufmgr_gem = 811 (drm_intel_bufmgr_gem *) bo->bufmgr; 812 struct timespec time; 813 814 clock_gettime(CLOCK_MONOTONIC, &time); 815 816 pthread_mutex_lock(&bufmgr_gem->lock); 817 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 818 pthread_mutex_unlock(&bufmgr_gem->lock); 819 } 820} 821 822static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 823{ 824 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 825 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 826 struct drm_i915_gem_set_domain set_domain; 827 int ret; 828 829 pthread_mutex_lock(&bufmgr_gem->lock); 830 831 /* Allow recursive mapping. Mesa may recursively map buffers with 832 * nested display loops. 833 */ 834 if (!bo_gem->mem_virtual) { 835 struct drm_i915_gem_mmap mmap_arg; 836 837 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 838 839 memset(&mmap_arg, 0, sizeof(mmap_arg)); 840 mmap_arg.handle = bo_gem->gem_handle; 841 mmap_arg.offset = 0; 842 mmap_arg.size = bo->size; 843 do { 844 ret = ioctl(bufmgr_gem->fd, 845 DRM_IOCTL_I915_GEM_MMAP, 846 &mmap_arg); 847 } while (ret == -1 && errno == EINTR); 848 if (ret != 0) { 849 ret = -errno; 850 fprintf(stderr, 851 "%s:%d: Error mapping buffer %d (%s): %s .\n", 852 __FILE__, __LINE__, bo_gem->gem_handle, 853 bo_gem->name, strerror(errno)); 854 pthread_mutex_unlock(&bufmgr_gem->lock); 855 return ret; 856 } 857 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 858 } 859 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 860 bo_gem->mem_virtual); 861 bo->virtual = bo_gem->mem_virtual; 862 863 set_domain.handle = bo_gem->gem_handle; 864 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 865 if (write_enable) 866 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 867 else 868 set_domain.write_domain = 0; 869 do { 870 ret = ioctl(bufmgr_gem->fd, 871 DRM_IOCTL_I915_GEM_SET_DOMAIN, 872 &set_domain); 873 } while (ret == -1 && errno == EINTR); 874 if (ret != 0) { 875 ret = -errno; 876 fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n", 877 __FILE__, __LINE__, bo_gem->gem_handle, 878 strerror(errno)); 879 pthread_mutex_unlock(&bufmgr_gem->lock); 880 return ret; 881 } 882 883 pthread_mutex_unlock(&bufmgr_gem->lock); 884 885 return 0; 886} 887 888int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 889{ 890 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 891 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 892 struct drm_i915_gem_set_domain set_domain; 893 int ret; 894 895 pthread_mutex_lock(&bufmgr_gem->lock); 896 897 /* Get a mapping of the buffer if we haven't before. */ 898 if (bo_gem->gtt_virtual == NULL) { 899 struct drm_i915_gem_mmap_gtt mmap_arg; 900 901 DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 902 bo_gem->name); 903 904 memset(&mmap_arg, 0, sizeof(mmap_arg)); 905 mmap_arg.handle = bo_gem->gem_handle; 906 907 /* Get the fake offset back... */ 908 do { 909 ret = ioctl(bufmgr_gem->fd, 910 DRM_IOCTL_I915_GEM_MMAP_GTT, 911 &mmap_arg); 912 } while (ret == -1 && errno == EINTR); 913 if (ret != 0) { 914 ret = -errno; 915 fprintf(stderr, 916 "%s:%d: Error preparing buffer map %d (%s): %s .\n", 917 __FILE__, __LINE__, 918 bo_gem->gem_handle, bo_gem->name, 919 strerror(errno)); 920 pthread_mutex_unlock(&bufmgr_gem->lock); 921 return ret; 922 } 923 924 /* and mmap it */ 925 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 926 MAP_SHARED, bufmgr_gem->fd, 927 mmap_arg.offset); 928 if (bo_gem->gtt_virtual == MAP_FAILED) { 929 ret = -errno; 930 fprintf(stderr, 931 "%s:%d: Error mapping buffer %d (%s): %s .\n", 932 __FILE__, __LINE__, 933 bo_gem->gem_handle, bo_gem->name, 934 strerror(errno)); 935 pthread_mutex_unlock(&bufmgr_gem->lock); 936 return ret; 937 } 938 } 939 940 bo->virtual = bo_gem->gtt_virtual; 941 942 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 943 bo_gem->gtt_virtual); 944 945 /* Now move it to the GTT domain so that the CPU caches are flushed */ 946 set_domain.handle = bo_gem->gem_handle; 947 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 948 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 949 do { 950 ret = ioctl(bufmgr_gem->fd, 951 DRM_IOCTL_I915_GEM_SET_DOMAIN, 952 &set_domain); 953 } while (ret == -1 && errno == EINTR); 954 955 if (ret != 0) { 956 ret = -errno; 957 fprintf(stderr, "%s:%d: Error setting domain %d: %s\n", 958 __FILE__, __LINE__, bo_gem->gem_handle, 959 strerror(errno)); 960 } 961 962 pthread_mutex_unlock(&bufmgr_gem->lock); 963 964 return ret; 965} 966 967int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 968{ 969 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 970 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 971 int ret = 0; 972 973 if (bo == NULL) 974 return 0; 975 976 assert(bo_gem->gtt_virtual != NULL); 977 978 pthread_mutex_lock(&bufmgr_gem->lock); 979 bo->virtual = NULL; 980 pthread_mutex_unlock(&bufmgr_gem->lock); 981 982 return ret; 983} 984 985static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 986{ 987 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 988 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 989 struct drm_i915_gem_sw_finish sw_finish; 990 int ret; 991 992 if (bo == NULL) 993 return 0; 994 995 assert(bo_gem->mem_virtual != NULL); 996 997 pthread_mutex_lock(&bufmgr_gem->lock); 998 999 /* Cause a flush to happen if the buffer's pinned for scanout, so the 1000 * results show up in a timely manner. 1001 */ 1002 sw_finish.handle = bo_gem->gem_handle; 1003 do { 1004 ret = ioctl(bufmgr_gem->fd, 1005 DRM_IOCTL_I915_GEM_SW_FINISH, 1006 &sw_finish); 1007 } while (ret == -1 && errno == EINTR); 1008 1009 bo->virtual = NULL; 1010 pthread_mutex_unlock(&bufmgr_gem->lock); 1011 return 0; 1012} 1013 1014static int 1015drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1016 unsigned long size, const void *data) 1017{ 1018 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1019 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1020 struct drm_i915_gem_pwrite pwrite; 1021 int ret; 1022 1023 memset(&pwrite, 0, sizeof(pwrite)); 1024 pwrite.handle = bo_gem->gem_handle; 1025 pwrite.offset = offset; 1026 pwrite.size = size; 1027 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1028 do { 1029 ret = ioctl(bufmgr_gem->fd, 1030 DRM_IOCTL_I915_GEM_PWRITE, 1031 &pwrite); 1032 } while (ret == -1 && errno == EINTR); 1033 if (ret != 0) { 1034 fprintf(stderr, 1035 "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1036 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1037 (int)size, strerror(errno)); 1038 } 1039 return 0; 1040} 1041 1042static int 1043drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1044{ 1045 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1046 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1047 int ret; 1048 1049 get_pipe_from_crtc_id.crtc_id = crtc_id; 1050 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1051 &get_pipe_from_crtc_id); 1052 if (ret != 0) { 1053 /* We return -1 here to signal that we don't 1054 * know which pipe is associated with this crtc. 1055 * This lets the caller know that this information 1056 * isn't available; using the wrong pipe for 1057 * vblank waiting can cause the chipset to lock up 1058 */ 1059 return -1; 1060 } 1061 1062 return get_pipe_from_crtc_id.pipe; 1063} 1064 1065static int 1066drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1067 unsigned long size, void *data) 1068{ 1069 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1070 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1071 struct drm_i915_gem_pread pread; 1072 int ret; 1073 1074 memset(&pread, 0, sizeof(pread)); 1075 pread.handle = bo_gem->gem_handle; 1076 pread.offset = offset; 1077 pread.size = size; 1078 pread.data_ptr = (uint64_t) (uintptr_t) data; 1079 do { 1080 ret = ioctl(bufmgr_gem->fd, 1081 DRM_IOCTL_I915_GEM_PREAD, 1082 &pread); 1083 } while (ret == -1 && errno == EINTR); 1084 if (ret != 0) { 1085 ret = -errno; 1086 fprintf(stderr, 1087 "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1088 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1089 (int)size, strerror(errno)); 1090 } 1091 return ret; 1092} 1093 1094/** Waits for all GPU rendering to the object to have completed. */ 1095static void 1096drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1097{ 1098 drm_intel_gem_bo_start_gtt_access(bo, 0); 1099} 1100 1101/** 1102 * Sets the object to the GTT read and possibly write domain, used by the X 1103 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1104 * 1105 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1106 * can do tiled pixmaps this way. 1107 */ 1108void 1109drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1110{ 1111 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1112 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1113 struct drm_i915_gem_set_domain set_domain; 1114 int ret; 1115 1116 set_domain.handle = bo_gem->gem_handle; 1117 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1118 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1119 do { 1120 ret = ioctl(bufmgr_gem->fd, 1121 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1122 &set_domain); 1123 } while (ret == -1 && errno == EINTR); 1124 if (ret != 0) { 1125 fprintf(stderr, 1126 "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1127 __FILE__, __LINE__, bo_gem->gem_handle, 1128 set_domain.read_domains, set_domain.write_domain, 1129 strerror(errno)); 1130 } 1131} 1132 1133static void 1134drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1135{ 1136 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1137 int i; 1138 1139 free(bufmgr_gem->exec_objects); 1140 free(bufmgr_gem->exec_bos); 1141 1142 pthread_mutex_destroy(&bufmgr_gem->lock); 1143 1144 /* Free any cached buffer objects we were going to reuse */ 1145 for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) { 1146 struct drm_intel_gem_bo_bucket *bucket = 1147 &bufmgr_gem->cache_bucket[i]; 1148 drm_intel_bo_gem *bo_gem; 1149 1150 while (!DRMLISTEMPTY(&bucket->head)) { 1151 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1152 bucket->head.next, head); 1153 DRMLISTDEL(&bo_gem->head); 1154 1155 drm_intel_gem_bo_free(&bo_gem->bo); 1156 } 1157 } 1158 1159 free(bufmgr); 1160} 1161 1162/** 1163 * Adds the target buffer to the validation list and adds the relocation 1164 * to the reloc_buffer's relocation list. 1165 * 1166 * The relocation entry at the given offset must already contain the 1167 * precomputed relocation value, because the kernel will optimize out 1168 * the relocation entry write when the buffer hasn't moved from the 1169 * last known offset in target_bo. 1170 */ 1171static int 1172drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1173 drm_intel_bo *target_bo, uint32_t target_offset, 1174 uint32_t read_domains, uint32_t write_domain) 1175{ 1176 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1177 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1178 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1179 1180 pthread_mutex_lock(&bufmgr_gem->lock); 1181 1182 /* Create a new relocation list if needed */ 1183 if (bo_gem->relocs == NULL) 1184 drm_intel_setup_reloc_list(bo); 1185 1186 /* Check overflow */ 1187 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1188 1189 /* Check args */ 1190 assert(offset <= bo->size - 4); 1191 assert((write_domain & (write_domain - 1)) == 0); 1192 1193 /* Make sure that we're not adding a reloc to something whose size has 1194 * already been accounted for. 1195 */ 1196 assert(!bo_gem->used_as_reloc_target); 1197 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1198 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1199 1200 /* Flag the target to disallow further relocations in it. */ 1201 target_bo_gem->used_as_reloc_target = 1; 1202 1203 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1204 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1205 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1206 target_bo_gem->gem_handle; 1207 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1208 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1209 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1210 1211 bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo; 1212 drm_intel_gem_bo_reference(target_bo); 1213 1214 bo_gem->reloc_count++; 1215 1216 pthread_mutex_unlock(&bufmgr_gem->lock); 1217 1218 return 0; 1219} 1220 1221/** 1222 * Walk the tree of relocations rooted at BO and accumulate the list of 1223 * validations to be performed and update the relocation buffers with 1224 * index values into the validation list. 1225 */ 1226static void 1227drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1228{ 1229 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1230 int i; 1231 1232 if (bo_gem->relocs == NULL) 1233 return; 1234 1235 for (i = 0; i < bo_gem->reloc_count; i++) { 1236 drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i]; 1237 1238 /* Continue walking the tree depth-first. */ 1239 drm_intel_gem_bo_process_reloc(target_bo); 1240 1241 /* Add the target to the validate list */ 1242 drm_intel_add_validate_buffer(target_bo); 1243 } 1244} 1245 1246static void 1247drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1248{ 1249 int i; 1250 1251 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1252 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1253 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1254 1255 /* Update the buffer offset */ 1256 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1257 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1258 bo_gem->gem_handle, bo_gem->name, bo->offset, 1259 (unsigned long long)bufmgr_gem->exec_objects[i]. 1260 offset); 1261 bo->offset = bufmgr_gem->exec_objects[i].offset; 1262 } 1263 } 1264} 1265 1266static int 1267drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1268 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 1269{ 1270 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1271 struct drm_i915_gem_execbuffer execbuf; 1272 int ret, i; 1273 1274 pthread_mutex_lock(&bufmgr_gem->lock); 1275 /* Update indices and set up the validate list. */ 1276 drm_intel_gem_bo_process_reloc(bo); 1277 1278 /* Add the batch buffer to the validation list. There are no 1279 * relocations pointing to it. 1280 */ 1281 drm_intel_add_validate_buffer(bo); 1282 1283 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 1284 execbuf.buffer_count = bufmgr_gem->exec_count; 1285 execbuf.batch_start_offset = 0; 1286 execbuf.batch_len = used; 1287 execbuf.cliprects_ptr = (uintptr_t) cliprects; 1288 execbuf.num_cliprects = num_cliprects; 1289 execbuf.DR1 = 0; 1290 execbuf.DR4 = DR4; 1291 1292 do { 1293 ret = ioctl(bufmgr_gem->fd, 1294 DRM_IOCTL_I915_GEM_EXECBUFFER, 1295 &execbuf); 1296 } while (ret != 0 && errno == EINTR); 1297 1298 if (ret != 0) { 1299 ret = -errno; 1300 if (errno == ENOSPC) { 1301 fprintf(stderr, 1302 "Execbuffer fails to pin. " 1303 "Estimate: %u. Actual: %u. Available: %u\n", 1304 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1305 bufmgr_gem-> 1306 exec_count), 1307 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1308 bufmgr_gem-> 1309 exec_count), 1310 (unsigned int)bufmgr_gem->gtt_size); 1311 } 1312 } 1313 drm_intel_update_buffer_offsets(bufmgr_gem); 1314 1315 if (bufmgr_gem->bufmgr.debug) 1316 drm_intel_gem_dump_validation_list(bufmgr_gem); 1317 1318 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1319 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1320 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1321 1322 /* Disconnect the buffer from the validate list */ 1323 bo_gem->validate_index = -1; 1324 bufmgr_gem->exec_bos[i] = NULL; 1325 } 1326 bufmgr_gem->exec_count = 0; 1327 pthread_mutex_unlock(&bufmgr_gem->lock); 1328 1329 return ret; 1330} 1331 1332static int 1333drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1334{ 1335 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1336 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1337 struct drm_i915_gem_pin pin; 1338 int ret; 1339 1340 memset(&pin, 0, sizeof(pin)); 1341 pin.handle = bo_gem->gem_handle; 1342 pin.alignment = alignment; 1343 1344 do { 1345 ret = ioctl(bufmgr_gem->fd, 1346 DRM_IOCTL_I915_GEM_PIN, 1347 &pin); 1348 } while (ret == -1 && errno == EINTR); 1349 1350 if (ret != 0) 1351 return -errno; 1352 1353 bo->offset = pin.offset; 1354 return 0; 1355} 1356 1357static int 1358drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1359{ 1360 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1361 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1362 struct drm_i915_gem_unpin unpin; 1363 int ret; 1364 1365 memset(&unpin, 0, sizeof(unpin)); 1366 unpin.handle = bo_gem->gem_handle; 1367 1368 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1369 if (ret != 0) 1370 return -errno; 1371 1372 return 0; 1373} 1374 1375static int 1376drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1377 uint32_t stride) 1378{ 1379 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1380 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1381 struct drm_i915_gem_set_tiling set_tiling; 1382 int ret; 1383 1384 if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode) 1385 return 0; 1386 1387 /* If we're going from non-tiling to tiling, bump fence count */ 1388 if (bo_gem->tiling_mode == I915_TILING_NONE) 1389 bo_gem->reloc_tree_fences++; 1390 1391 memset(&set_tiling, 0, sizeof(set_tiling)); 1392 set_tiling.handle = bo_gem->gem_handle; 1393 set_tiling.tiling_mode = *tiling_mode; 1394 set_tiling.stride = stride; 1395 1396 do { 1397 ret = ioctl(bufmgr_gem->fd, 1398 DRM_IOCTL_I915_GEM_SET_TILING, 1399 &set_tiling); 1400 } while (ret == -1 && errno == EINTR); 1401 if (ret != 0) { 1402 *tiling_mode = bo_gem->tiling_mode; 1403 return -errno; 1404 } 1405 bo_gem->tiling_mode = set_tiling.tiling_mode; 1406 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1407 1408 /* If we're going from tiling to non-tiling, drop fence count */ 1409 if (bo_gem->tiling_mode == I915_TILING_NONE) 1410 bo_gem->reloc_tree_fences--; 1411 1412 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1413 1414 *tiling_mode = bo_gem->tiling_mode; 1415 return 0; 1416} 1417 1418static int 1419drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1420 uint32_t * swizzle_mode) 1421{ 1422 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1423 1424 *tiling_mode = bo_gem->tiling_mode; 1425 *swizzle_mode = bo_gem->swizzle_mode; 1426 return 0; 1427} 1428 1429static int 1430drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 1431{ 1432 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1433 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1434 struct drm_gem_flink flink; 1435 int ret; 1436 1437 if (!bo_gem->global_name) { 1438 memset(&flink, 0, sizeof(flink)); 1439 flink.handle = bo_gem->gem_handle; 1440 1441 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1442 if (ret != 0) 1443 return -errno; 1444 bo_gem->global_name = flink.name; 1445 bo_gem->reusable = 0; 1446 } 1447 1448 *name = bo_gem->global_name; 1449 return 0; 1450} 1451 1452/** 1453 * Enables unlimited caching of buffer objects for reuse. 1454 * 1455 * This is potentially very memory expensive, as the cache at each bucket 1456 * size is only bounded by how many buffers of that size we've managed to have 1457 * in flight at once. 1458 */ 1459void 1460drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1461{ 1462 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1463 1464 bufmgr_gem->bo_reuse = 1; 1465} 1466 1467/** 1468 * Return the additional aperture space required by the tree of buffer objects 1469 * rooted at bo. 1470 */ 1471static int 1472drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1473{ 1474 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1475 int i; 1476 int total = 0; 1477 1478 if (bo == NULL || bo_gem->included_in_check_aperture) 1479 return 0; 1480 1481 total += bo->size; 1482 bo_gem->included_in_check_aperture = 1; 1483 1484 for (i = 0; i < bo_gem->reloc_count; i++) 1485 total += 1486 drm_intel_gem_bo_get_aperture_space(bo_gem-> 1487 reloc_target_bo[i]); 1488 1489 return total; 1490} 1491 1492/** 1493 * Count the number of buffers in this list that need a fence reg 1494 * 1495 * If the count is greater than the number of available regs, we'll have 1496 * to ask the caller to resubmit a batch with fewer tiled buffers. 1497 * 1498 * This function over-counts if the same buffer is used multiple times. 1499 */ 1500static unsigned int 1501drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 1502{ 1503 int i; 1504 unsigned int total = 0; 1505 1506 for (i = 0; i < count; i++) { 1507 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1508 1509 if (bo_gem == NULL) 1510 continue; 1511 1512 total += bo_gem->reloc_tree_fences; 1513 } 1514 return total; 1515} 1516 1517/** 1518 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1519 * for the next drm_intel_bufmgr_check_aperture_space() call. 1520 */ 1521static void 1522drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1523{ 1524 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1525 int i; 1526 1527 if (bo == NULL || !bo_gem->included_in_check_aperture) 1528 return; 1529 1530 bo_gem->included_in_check_aperture = 0; 1531 1532 for (i = 0; i < bo_gem->reloc_count; i++) 1533 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 1534 reloc_target_bo[i]); 1535} 1536 1537/** 1538 * Return a conservative estimate for the amount of aperture required 1539 * for a collection of buffers. This may double-count some buffers. 1540 */ 1541static unsigned int 1542drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1543{ 1544 int i; 1545 unsigned int total = 0; 1546 1547 for (i = 0; i < count; i++) { 1548 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1549 if (bo_gem != NULL) 1550 total += bo_gem->reloc_tree_size; 1551 } 1552 return total; 1553} 1554 1555/** 1556 * Return the amount of aperture needed for a collection of buffers. 1557 * This avoids double counting any buffers, at the cost of looking 1558 * at every buffer in the set. 1559 */ 1560static unsigned int 1561drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1562{ 1563 int i; 1564 unsigned int total = 0; 1565 1566 for (i = 0; i < count; i++) { 1567 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1568 /* For the first buffer object in the array, we get an 1569 * accurate count back for its reloc_tree size (since nothing 1570 * had been flagged as being counted yet). We can save that 1571 * value out as a more conservative reloc_tree_size that 1572 * avoids double-counting target buffers. Since the first 1573 * buffer happens to usually be the batch buffer in our 1574 * callers, this can pull us back from doing the tree 1575 * walk on every new batch emit. 1576 */ 1577 if (i == 0) { 1578 drm_intel_bo_gem *bo_gem = 1579 (drm_intel_bo_gem *) bo_array[i]; 1580 bo_gem->reloc_tree_size = total; 1581 } 1582 } 1583 1584 for (i = 0; i < count; i++) 1585 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1586 return total; 1587} 1588 1589/** 1590 * Return -1 if the batchbuffer should be flushed before attempting to 1591 * emit rendering referencing the buffers pointed to by bo_array. 1592 * 1593 * This is required because if we try to emit a batchbuffer with relocations 1594 * to a tree of buffers that won't simultaneously fit in the aperture, 1595 * the rendering will return an error at a point where the software is not 1596 * prepared to recover from it. 1597 * 1598 * However, we also want to emit the batchbuffer significantly before we reach 1599 * the limit, as a series of batchbuffers each of which references buffers 1600 * covering almost all of the aperture means that at each emit we end up 1601 * waiting to evict a buffer from the last rendering, and we get synchronous 1602 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1603 * get better parallelism. 1604 */ 1605static int 1606drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1607{ 1608 drm_intel_bufmgr_gem *bufmgr_gem = 1609 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 1610 unsigned int total = 0; 1611 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1612 int total_fences; 1613 1614 /* Check for fence reg constraints if necessary */ 1615 if (bufmgr_gem->available_fences) { 1616 total_fences = drm_intel_gem_total_fences(bo_array, count); 1617 if (total_fences > bufmgr_gem->available_fences) 1618 return -ENOSPC; 1619 } 1620 1621 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1622 1623 if (total > threshold) 1624 total = drm_intel_gem_compute_batch_space(bo_array, count); 1625 1626 if (total > threshold) { 1627 DBG("check_space: overflowed available aperture, " 1628 "%dkb vs %dkb\n", 1629 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1630 return -ENOSPC; 1631 } else { 1632 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 1633 (int)bufmgr_gem->gtt_size / 1024); 1634 return 0; 1635 } 1636} 1637 1638/* 1639 * Disable buffer reuse for objects which are shared with the kernel 1640 * as scanout buffers 1641 */ 1642static int 1643drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1644{ 1645 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1646 1647 bo_gem->reusable = 0; 1648 return 0; 1649} 1650 1651static int 1652_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1653{ 1654 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1655 int i; 1656 1657 for (i = 0; i < bo_gem->reloc_count; i++) { 1658 if (bo_gem->reloc_target_bo[i] == target_bo) 1659 return 1; 1660 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_bo[i], 1661 target_bo)) 1662 return 1; 1663 } 1664 1665 return 0; 1666} 1667 1668/** Return true if target_bo is referenced by bo's relocation tree. */ 1669static int 1670drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1671{ 1672 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1673 1674 if (bo == NULL || target_bo == NULL) 1675 return 0; 1676 if (target_bo_gem->used_as_reloc_target) 1677 return _drm_intel_gem_bo_references(bo, target_bo); 1678 return 0; 1679} 1680 1681/** 1682 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1683 * and manage map buffer objections. 1684 * 1685 * \param fd File descriptor of the opened DRM device. 1686 */ 1687drm_intel_bufmgr * 1688drm_intel_bufmgr_gem_init(int fd, int batch_size) 1689{ 1690 drm_intel_bufmgr_gem *bufmgr_gem; 1691 struct drm_i915_gem_get_aperture aperture; 1692 drm_i915_getparam_t gp; 1693 int ret, i; 1694 unsigned long size; 1695 1696 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 1697 bufmgr_gem->fd = fd; 1698 1699 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 1700 free(bufmgr_gem); 1701 return NULL; 1702 } 1703 1704 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); 1705 1706 if (ret == 0) 1707 bufmgr_gem->gtt_size = aperture.aper_available_size; 1708 else { 1709 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 1710 strerror(errno)); 1711 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 1712 fprintf(stderr, "Assuming %dkB available aperture size.\n" 1713 "May lead to reduced performance or incorrect " 1714 "rendering.\n", 1715 (int)bufmgr_gem->gtt_size / 1024); 1716 } 1717 1718 gp.param = I915_PARAM_CHIPSET_ID; 1719 gp.value = &bufmgr_gem->pci_device; 1720 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1721 if (ret) { 1722 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 1723 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 1724 } 1725 1726 if (!IS_I965G(bufmgr_gem)) { 1727 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 1728 gp.value = &bufmgr_gem->available_fences; 1729 ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 1730 if (ret) { 1731 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 1732 errno); 1733 fprintf(stderr, "param: %d, val: %d\n", gp.param, 1734 *gp.value); 1735 bufmgr_gem->available_fences = 0; 1736 } 1737 } 1738 1739 /* Let's go with one relocation per every 2 dwords (but round down a bit 1740 * since a power of two will mean an extra page allocation for the reloc 1741 * buffer). 1742 * 1743 * Every 4 was too few for the blender benchmark. 1744 */ 1745 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 1746 1747 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 1748 bufmgr_gem->bufmgr.bo_alloc_for_render = 1749 drm_intel_gem_bo_alloc_for_render; 1750 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 1751 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 1752 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 1753 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 1754 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 1755 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 1756 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 1757 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 1758 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 1759 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 1760 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 1761 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 1762 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 1763 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 1764 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 1765 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 1766 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 1767 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 1768 bufmgr_gem->bufmgr.debug = 0; 1769 bufmgr_gem->bufmgr.check_aperture_space = 1770 drm_intel_gem_check_aperture_space; 1771 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 1772 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 1773 drm_intel_gem_get_pipe_from_crtc_id; 1774 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 1775 1776 /* Initialize the linked lists for BO reuse cache. */ 1777 for (i = 0, size = 4096; i < DRM_INTEL_GEM_BO_BUCKETS; i++, size *= 2) { 1778 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 1779 bufmgr_gem->cache_bucket[i].size = size; 1780 } 1781 1782 return &bufmgr_gem->bufmgr; 1783} 1784